which vllm version is for Qwen/Qwen3-VL-Embedding-2B

#10
by xinquan - opened

vllm=0.11.0 report error:
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] EngineCore failed to start.
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] Traceback (most recent call last):
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 699, in run_engine_core
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 498, in init
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] super().init(vllm_config, executor_class, log_stats,
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 92, in init
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] self._initialize_kv_caches(vllm_config)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 190, in _initialize_kv_caches
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] self.model_executor.determine_available_memory())
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/executor/abstract.py", line 85, in determine_available_memory
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return self.collective_rpc("determine_available_memory")
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 83, in collective_rpc
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return [run_method(self.driver_worker, method, args, kwargs)]
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/utils/init.py", line 3122, in run_method
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return func(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return func(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] self.model_runner.profile_run()
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3361, in profile_run
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] self.model.get_multimodal_embeddings(
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1381, in get_multimodal_embeddings
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] video_embeddings = self._process_video_input(multimodal_input)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1335, in _process_video_input
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] video_embeds = self.visual(pixel_values_videos,
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/model_executor/models/qwen3_vl.py", line 517, in forward
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] hidden_states = blk(hidden_states,
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/model_executor/models/qwen3_vl.py", line 200, in forward
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] x = x + self.attn(self.norm1(x),
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/model_executor/models/qwen2_5_vl.py", line 369, in forward
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] output = flash_attn_varlen_func(q,
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/vllm/vllm_flash_attn/flash_attn_interface.py", line 233, in flash_attn_varlen_func
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] out, softmax_lse = torch.ops._vllm_fa2_C.varlen_fwd(
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] File "/opt/conda/envs/qwen3vl/lib/python3.10/site-packages/torch/_ops.py", line 1243, in call
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] return self._op(*args, **kwargs)
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] torch.AcceleratorError: CUDA error: the provided PTX was compiled with an unsupported toolchain.
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] For debugging consider passing CUDA_LAUNCH_BLOCKING=1
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708] Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
(EngineCore_DP0 pid=919167) ERROR 01-12 16:21:42 [core.py:708]

This worked for me:
vllm=0.14.1

import os
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

Sign up or log in to comment