NeMo_Canary / nemo /export /vllm /model_loader.py
Respair's picture
Upload folder using huggingface_hub
b386992 verified
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os.path
from typing import Any, Dict
import safetensors.torch
import torch
from vllm.config import ModelConfig
from vllm.model_executor.model_loader.loader import BaseModelLoader, _initialize_model
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
from nemo.export.utils import load_model_weights
from nemo.export.vllm.model_config import NemoModelConfig
LOGGER = logging.getLogger("NeMo")
class NemoModelLoader(BaseModelLoader):
"""
Implements a custom ModelLoader for vLLM that reads the weights from a Nemo checkpoint
and converts them to a vLLM compatible format at load time.
Also supports an ahead-of-time conversion that stores new weights in a Safetensors file,
see convert_and_store_nemo_weights(...)
"""
@staticmethod
def _load_nemo_checkpoint_state(nemo_file: str) -> Dict[str, Any]:
LOGGER.info(f'Loading weights from {nemo_file}...')
return load_model_weights(nemo_file)
def download_model(self, model_config: ModelConfig) -> None: # pylint: disable=missing-function-docstring
raise NotImplementedError
def load_model(
self,
*,
vllm_config: NemoModelConfig,
) -> torch.nn.Module:
"""
Overrides the load_model function from BaseModelLoader to convert Nemo weights at load time.
"""
model_config = vllm_config.model_config
device_config = vllm_config.device_config
assert isinstance(model_config, NemoModelConfig)
state_dict = NemoModelLoader._load_nemo_checkpoint_state(model_config.nemo_checkpoint)
with set_default_torch_dtype(model_config.dtype):
with torch.device(device_config.device):
model = _initialize_model(vllm_config)
config = model_config.nemo_model_config
if 'config' in config:
config = config['config']
state_dict = NemoModelLoader._standardize_nemo2_naming(state_dict)
weights_iterator = model_config.model_converter.convert_weights(config, state_dict)
model.load_weights(weights_iterator)
return model.eval()
@staticmethod
def convert_and_store_nemo_weights(model_config: NemoModelConfig, safetensors_file: str):
"""
Converts Nemo weights and stores the converted weights in a Safetensors file.
"""
assert isinstance(model_config, NemoModelConfig)
assert os.path.exists(model_config.model)
state_dict = NemoModelLoader._load_nemo_checkpoint_state(model_config.nemo_checkpoint)
config = model_config.nemo_model_config
# NeMo2 checkpoint loads the whole TrainerContext where the config is stored under 'config' key
if 'config' in config:
config = config['config']
state_dict = NemoModelLoader._standardize_nemo2_naming(state_dict)
tensors = {name: tensor for name, tensor in model_config.model_converter.convert_weights(config, state_dict)}
LOGGER.info(f'Saving weights to {safetensors_file}...')
safetensors.torch.save_file(tensors, safetensors_file)
@staticmethod
def _standardize_nemo2_naming(state_dict: Dict[str, Any]) -> Dict[str, Any]:
return {k.replace('module', 'model'): v for k, v in state_dict.items()}