# Copyright 2024 Bytedance Ltd. and/or its affiliates # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import importlib import logging import os from packaging.version import parse as parse_version from .protocol import DataProto from .utils.device import is_npu_available from .utils.import_utils import import_external_libs from .utils.logging_utils import set_basic_config version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) with open(os.path.join(version_folder, "version/version")) as f: __version__ = f.read().strip() set_basic_config(level=logging.WARNING) __all__ = ["DataProto", "__version__"] modules = os.getenv("VERL_USE_EXTERNAL_MODULES", "") if modules: modules = modules.split(",") import_external_libs(modules) if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true": if importlib.util.find_spec("modelscope") is None: raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`") # Patch hub to download models from modelscope to speed up. from modelscope.utils.hf_util import patch_hub patch_hub() if is_npu_available: # Workaround for torch-npu's lack of support for creating nested tensors from NPU tensors. # # ``` # >>> a, b = torch.arange(3).npu(), torch.arange(5).npu() + 3 # >>> nt = torch.nested.nested_tensor([a, b], layout=torch.jagged) # ``` # throws "not supported in npu" on Ascend NPU. # See https://github.com/Ascend/pytorch/blob/294cdf5335439b359991cecc042957458a8d38ae/torch_npu/utils/npu_intercept.py#L109 # for details. import torch try: if hasattr(torch.nested.nested_tensor, "__wrapped__"): torch.nested.nested_tensor = torch.nested.nested_tensor.__wrapped__ if hasattr(torch.nested.as_nested_tensor, "__wrapped__"): torch.nested.as_nested_tensor = torch.nested.as_nested_tensor.__wrapped__ except AttributeError: pass # In verl, the driver process aggregates the computation results of workers via Ray. # Therefore, after a worker completes its computation job, it will package the output # using tensordict and transfer it to the CPU. Since the `to` operation of tensordict # is non-blocking, when transferring data from a device to the CPU, it is necessary to # ensure that a batch of data has been completely transferred before being used on the # host; otherwise, unexpected precision issues may arise. Tensordict has already noticed # this problem and fixed it. Ref: https://github.com/pytorch/tensordict/issues/725 # However, the relevant modifications only cover CUDA and MPS devices and do not take effect # for third-party devices such as NPUs. This patch fixes this issue, and the relevant # modifications can be removed once the fix is merged into tensordict. import tensordict if parse_version(tensordict.__version__) < parse_version("0.10.0"): from tensordict.base import TensorDictBase def _sync_all_patch(self): from torch._utils import _get_available_device_type, _get_device_module device_type = _get_available_device_type() if device_type is None: return device_module = _get_device_module(device_type) device_module.synchronize() TensorDictBase._sync_all = _sync_all_patch