KeyError: 'layers.0.experts.0.down_proj.weight' on vllm/vllm-openai:gemma4

#1
by timbmg - opened

Hi, thanks for the FP8 checkpoint!

I tried to run it on vllm/vllm-openai:gemma4, which vllm recommends currently for gemm4. However, I got a KeyError: 'layers.0.experts.0.down_proj.weight' when loading then model. Full stack-trace below:

WorkerProc failed to start.
Traceback (most recent call last):
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 826, in worker_main
worker = WorkerProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 613, in init
self.worker.load_model()
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 323, in load_model
self.model_runner.load_model(load_dummy_weights=load_dummy_weights)
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 4749, in load_model
self.model = model_loader.load_model(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 64, in load_model
self.load_weights(model, model_config)
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 381, in load_weights
loaded_weights = model.load_weights(self.get_all_weights(model_config, model))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4_mm.py", line 1319, in load_weights
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/reload/torchao_decorator.py", line 50, in patched_model_load_weights
return original_load_weights(self, weights, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 355, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 302, in _load_module
yield from self._load_module(
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 275, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4.py", line 1239, in load_weights
return loader.load_weights(_weight_iterator())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/reload/torchao_decorator.py", line 50, in patched_model_load_weights
return original_load_weights(self, weights, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 355, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 302, in _load_module
yield from self._load_module(
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 275, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4.py", line 1037, in load_weights
param = params_dict[name]
~~~~~~~~~~~^^^^^^
KeyError: 'layers.0.experts.0.down_proj.weight'
WorkerProc failed to start.
Traceback (most recent call last):
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 826, in worker_main
worker = WorkerProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 613, in init
self.worker.load_model()
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 323, in load_model
self.model_runner.load_model(load_dummy_weights=load_dummy_weights)
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 4749, in load_model
self.model = model_loader.load_model(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 64, in load_model
self.load_weights(model, model_config)
File "/usr/local/lib/python3.12/dist-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 381, in load_weights
loaded_weights = model.load_weights(self.get_all_weights(model_config, model))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4_mm.py", line 1319, in load_weights
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/reload/torchao_decorator.py", line 50, in patched_model_load_weights
return original_load_weights(self, weights, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 355, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 302, in _load_module
yield from self._load_module(
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 275, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4.py", line 1239, in load_weights
return loader.load_weights(_weight_iterator())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/reload/torchao_decorator.py", line 50, in patched_model_load_weights
return original_load_weights(self, weights, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 355, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 302, in _load_module
yield from self._load_module(
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 275, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma4.py", line 1037, in load_weights
param = params_dict[name]
~~~~~~~~~~~^^^^^^
KeyError: 'layers.0.experts.0.down_proj.weight'

I got the same problem

Also got the same problem

I got the same problem

Sign up or log in to comment