Add basic vllm usage

#6
by yuhao318 - opened
Files changed (1) hide show
  1. README.md +103 -0
README.md CHANGED
@@ -101,6 +101,109 @@ scores = model.process(inputs)
101
  print(scores)
102
  # [0.8613124489784241, 0.6757137179374695, 0.8125371336936951]
103
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  For more usage examples, please visit our [GitHub repository](https://github.com/QwenLM/Qwen3-VL-Embedding).
105
 
106
  ## Citation
 
101
  print(scores)
102
  # [0.8613124489784241, 0.6757137179374695, 0.8125371336936951]
103
  ```
104
+
105
+ ### vLLM Basic Usage Example
106
+ ```python
107
+ import argparse
108
+ import os
109
+ from pathlib import Path
110
+ from typing import Dict, Any
111
+ from vllm import LLM, EngineArgs
112
+ from vllm.entrypoints.score_utils import ScoreMultiModalParam
113
+
114
+
115
+ queries = [
116
+ {"text": "A woman playing with her dog on a beach at sunset."}
117
+ ]
118
+
119
+ documents = [
120
+ {"text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust."},
121
+ {"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"},
122
+ {"text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
123
+ "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"}
124
+ ]
125
+
126
+
127
+ def format_document_to_score_param(doc_dict: Dict[str, Any]) -> ScoreMultiModalParam:
128
+ content = []
129
+
130
+ text = doc_dict.get('text')
131
+ image = doc_dict.get('image')
132
+
133
+ if text:
134
+ content.append({
135
+ "type": "text",
136
+ "text": text
137
+ })
138
+
139
+ if image:
140
+ image_url = image
141
+ if isinstance(image, str) and not image.startswith(('http', 'https', 'oss')):
142
+ abs_image_path = os.path.abspath(image)
143
+ image_url = 'file://' + abs_image_path
144
+
145
+ content.append({
146
+ "type": "image_url",
147
+ "image_url": {
148
+ "url": image_url
149
+ }
150
+ })
151
+
152
+ if not content:
153
+ content.append({
154
+ "type": "text",
155
+ "text": ""
156
+ })
157
+
158
+ return {"content": content}
159
+
160
+
161
+ def main():
162
+ parser = argparse.ArgumentParser(description="Offline Reranker with vLLM")
163
+ parser.add_argument("--model-path", type=str, default="models/Qwen3-VL-Reranker-2B", help="Path to the reranker model")
164
+ parser.add_argument("--dtype", type=str, default="bfloat16", help="Data type (e.g., bfloat16)")
165
+ parser.add_argument("--template-path", type=str, default="vllm/examples/pooling/score/template/qwen3_vl_reranker.jinja",
166
+ help="Path to chat template file")
167
+ args = parser.parse_args()
168
+
169
+ print(f"Loading model from {args.model_path}...")
170
+
171
+ engine_args = EngineArgs(
172
+ model=args.model_path,
173
+ runner="pooling",
174
+ dtype=args.dtype,
175
+ trust_remote_code=True,
176
+ hf_overrides={
177
+ "architectures": ["Qwen3VLForSequenceClassification"],
178
+ "classifier_from_token": ["no", "yes"],
179
+ "is_original_qwen3_reranker": True,
180
+ },
181
+ )
182
+
183
+ llm = LLM(**vars(engine_args))
184
+
185
+ template_path = Path(args.template_path)
186
+ chat_template = template_path.read_text() if template_path.exists() else None
187
+
188
+ for query_dict in queries:
189
+ query_text = query_dict.get('text', '')
190
+ print(f"\nQuery: {query_text}")
191
+
192
+ scores = []
193
+ for doc_dict in documents:
194
+ doc_param = format_document_to_score_param(doc_dict)
195
+ outputs = llm.score(query_text, doc_param, chat_template=chat_template)
196
+ score = outputs[0].outputs.score
197
+ scores.append(score)
198
+
199
+ print(scores)
200
+
201
+
202
+ if __name__ == "__main__":
203
+ main()
204
+
205
+ ```
206
+
207
  For more usage examples, please visit our [GitHub repository](https://github.com/QwenLM/Qwen3-VL-Embedding).
208
 
209
  ## Citation