Add basic vllm usage

#5
by yuhao318 - opened
Files changed (1) hide show
  1. README.md +102 -0
README.md CHANGED
@@ -100,6 +100,108 @@ scores = model.process(inputs)
100
  print(scores)
101
  # [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
102
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  For more usage examples, please visit our [GitHub repository](https://github.com/QwenLM/Qwen3-VL-Embedding).
104
 
105
  ## Citation
 
100
  print(scores)
101
  # [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
102
  ```
103
+
104
+ ### vLLM Basic Usage Example
105
+ ```python
106
+ import argparse
107
+ import os
108
+ from pathlib import Path
109
+ from typing import Dict, Any
110
+ from vllm import LLM, EngineArgs
111
+ from vllm.entrypoints.score_utils import ScoreMultiModalParam
112
+
113
+
114
+ queries = [
115
+ {"text": "A woman playing with her dog on a beach at sunset."}
116
+ ]
117
+
118
+ documents = [
119
+ {"text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust."},
120
+ {"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"},
121
+ {"text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
122
+ "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"}
123
+ ]
124
+
125
+
126
+ def format_document_to_score_param(doc_dict: Dict[str, Any]) -> ScoreMultiModalParam:
127
+ content = []
128
+
129
+ text = doc_dict.get('text')
130
+ image = doc_dict.get('image')
131
+
132
+ if text:
133
+ content.append({
134
+ "type": "text",
135
+ "text": text
136
+ })
137
+
138
+ if image:
139
+ image_url = image
140
+ if isinstance(image, str) and not image.startswith(('http', 'https', 'oss')):
141
+ abs_image_path = os.path.abspath(image)
142
+ image_url = 'file://' + abs_image_path
143
+
144
+ content.append({
145
+ "type": "image_url",
146
+ "image_url": {
147
+ "url": image_url
148
+ }
149
+ })
150
+
151
+ if not content:
152
+ content.append({
153
+ "type": "text",
154
+ "text": ""
155
+ })
156
+
157
+ return {"content": content}
158
+
159
+
160
+ def main():
161
+ parser = argparse.ArgumentParser(description="Offline Reranker with vLLM")
162
+ parser.add_argument("--model-path", type=str, default="models/Qwen3-VL-Reranker-8B", help="Path to the reranker model")
163
+ parser.add_argument("--dtype", type=str, default="bfloat16", help="Data type (e.g., bfloat16)")
164
+ parser.add_argument("--template-path", type=str, default="vllm/examples/pooling/score/template/qwen3_vl_reranker.jinja",
165
+ help="Path to chat template file")
166
+ args = parser.parse_args()
167
+
168
+ print(f"Loading model from {args.model_path}...")
169
+
170
+ engine_args = EngineArgs(
171
+ model=args.model_path,
172
+ runner="pooling",
173
+ dtype=args.dtype,
174
+ trust_remote_code=True,
175
+ hf_overrides={
176
+ "architectures": ["Qwen3VLForSequenceClassification"],
177
+ "classifier_from_token": ["no", "yes"],
178
+ "is_original_qwen3_reranker": True,
179
+ },
180
+ )
181
+
182
+ llm = LLM(**vars(engine_args))
183
+
184
+ template_path = Path(args.template_path)
185
+ chat_template = template_path.read_text() if template_path.exists() else None
186
+
187
+ for query_dict in queries:
188
+ query_text = query_dict.get('text', '')
189
+ print(f"\nQuery: {query_text}")
190
+
191
+ scores = []
192
+ for doc_dict in documents:
193
+ doc_param = format_document_to_score_param(doc_dict)
194
+ outputs = llm.score(query_text, doc_param, chat_template=chat_template)
195
+ score = outputs[0].outputs.score
196
+ scores.append(score)
197
+
198
+ print(scores)
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()
203
+
204
+ ```
205
  For more usage examples, please visit our [GitHub repository](https://github.com/QwenLM/Qwen3-VL-Embedding).
206
 
207
  ## Citation