# /*--------------------------------------------------------------------------------------------- #  * Copyright (c) 2025 STMicroelectronics. #  * All rights reserved. #  * #  * This software is licensed under terms that can be found in the LICENSE file in #  * the root directory of this software component. #  * If no LICENSE file comes with this software, it is provided AS-IS. #  *--------------------------------------------------------------------------------------------*/ #!/usr/bin/env python3 # -*- coding:utf-8 -*- # Copyright (c) Megvii Inc. All rights reserved. import os import subprocess from loguru import logger import torch import cv2 from .dist import get_world_size, is_main_process from datetime import timedelta import torch.distributed as dist __all__ = ["configure_nccl", "configure_module", "configure_omp", "init_distributed_mode"] def configure_nccl(): """Configure multi-machine environment variables of NCCL.""" os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" os.environ["NCCL_IB_HCA"] = subprocess.getoutput( "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " "| grep v >/dev/null && echo $i ; done; popd > /dev/null" ) os.environ["NCCL_IB_GID_INDEX"] = "3" os.environ["NCCL_IB_TC"] = "106" def configure_omp(num_threads=1): """ If OMP_NUM_THREADS is not configured and world_size is greater than 1, Configure OMP_NUM_THREADS environment variables of NCCL to `num_thread`. Args: num_threads (int): value of `OMP_NUM_THREADS` to set. """ # We set OMP_NUM_THREADS=1 by default, which achieves the best speed on our machines # feel free to change it for better performance. if "OMP_NUM_THREADS" not in os.environ and get_world_size() > 1: os.environ["OMP_NUM_THREADS"] = str(num_threads) if is_main_process(): logger.info( "\n***************************************************************\n" "We set `OMP_NUM_THREADS` for each process to {} to speed up.\n" "please further tune the variable for optimal performance.\n" "***************************************************************".format( os.environ["OMP_NUM_THREADS"] ) ) def configure_module(ulimit_value=8192): """ Configure pytorch module environment. setting of ulimit and cv2 will be set. Args: ulimit_value(int): default open file number on linux. Default value: 8192. """ # system setting try: import resource rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1])) except Exception: # Exception might be raised in Windows OS or rlimit reaches max limit number. # However, set rlimit value might not be necessary. pass # cv2 # multiprocess might be harmful on performance of torch dataloader os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" try: cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) except Exception: # cv2 version mismatch might rasie exceptions. pass def init_distributed_mode(backend="nccl", timeout=timedelta(minutes=30)): """ Initialize torch.distributed using environment variables set by torchrun. Returns: rank, world_size, local_rank """ if "RANK" in os.environ and "WORLD_SIZE" in os.environ: rank = int(os.environ["RANK"]) world_size = int(os.environ["WORLD_SIZE"]) dist.init_process_group( backend=backend, init_method="env://", world_size=world_size, rank=rank, timeout=timeout, ) # local_rank = int(os.environ["LOCAL_RANK"]) local_rank = int(os.getenv("LOCAL_RANK", 0)) torch.cuda.set_device(local_rank) else: rank = 0 world_size = 1 local_rank = 0