| # Adapted from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/distributed/device_communicators/cuda_wrapper.py | |
| """This file is a pure Python wrapper for the cudart library. | |
| It avoids the need to compile a separate shared library, and is | |
| convenient for use when we just need to call a few functions. | |
| """ | |
| import ctypes | |
| import logging | |
| from dataclasses import dataclass | |
| from typing import Any, Dict, List, Optional | |
| # this line makes it possible to directly load `libcudart.so` using `ctypes` | |
| import torch # noqa | |
| logger = logging.getLogger(__name__) | |
| # === export types and functions from cudart to Python === | |
| # for the original cudart definition, please check | |
| # https://docs.nvidia.com/cuda/cuda-runtime-api/index.html | |
| cudaError_t = ctypes.c_int | |
| cudaMemcpyKind = ctypes.c_int | |
| class cudaIpcMemHandle_t(ctypes.Structure): | |
| _fields_ = [("internal", ctypes.c_byte * 128)] | |
| class Function: | |
| name: str | |
| restype: Any | |
| argtypes: List[Any] | |
| def find_loaded_library(lib_name) -> Optional[str]: | |
| """ | |
| According to according to https://man7.org/linux/man-pages/man5/proc_pid_maps.5.html, | |
| the file `/proc/self/maps` contains the memory maps of the process, which includes the | |
| shared libraries loaded by the process. We can use this file to find the path of the | |
| a loaded library. | |
| """ # noqa | |
| found = False | |
| with open("/proc/self/maps") as f: | |
| for line in f: | |
| if lib_name in line: | |
| found = True | |
| break | |
| if not found: | |
| # the library is not loaded in the current process | |
| return None | |
| # if lib_name is libcudart, we need to match a line with: | |
| # address /path/to/libcudart-hash.so.11.0 | |
| start = line.index("/") | |
| path = line[start:].strip() | |
| filename = path.split("/")[-1] | |
| assert filename.rpartition(".so")[0].startswith( | |
| lib_name | |
| ), f"Unexpected filename: {filename} for library {lib_name}" | |
| return path | |
| class CudaRTLibrary: | |
| exported_functions = [ | |
| # cudaError_t cudaSetDevice ( int device ) | |
| Function("cudaSetDevice", cudaError_t, [ctypes.c_int]), | |
| # cudaError_t cudaDeviceSynchronize ( void ) | |
| Function("cudaDeviceSynchronize", cudaError_t, []), | |
| # cudaError_t cudaDeviceReset ( void ) | |
| Function("cudaDeviceReset", cudaError_t, []), | |
| # const char* cudaGetErrorString ( cudaError_t error ) | |
| Function("cudaGetErrorString", ctypes.c_char_p, [cudaError_t]), | |
| # cudaError_t cudaMalloc ( void** devPtr, size_t size ) | |
| Function( | |
| "cudaMalloc", | |
| cudaError_t, | |
| [ctypes.POINTER(ctypes.c_void_p), ctypes.c_size_t], | |
| ), | |
| # cudaError_t cudaFree ( void* devPtr ) | |
| Function("cudaFree", cudaError_t, [ctypes.c_void_p]), | |
| # cudaError_t cudaMemset ( void* devPtr, int value, size_t count ) | |
| Function( | |
| "cudaMemset", cudaError_t, [ctypes.c_void_p, ctypes.c_int, ctypes.c_size_t] | |
| ), | |
| # cudaError_t cudaMemcpy ( void* dst, const void* src, size_t count, cudaMemcpyKind kind ) # noqa | |
| Function( | |
| "cudaMemcpy", | |
| cudaError_t, | |
| [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, cudaMemcpyKind], | |
| ), | |
| # cudaError_t cudaIpcGetMemHandle ( cudaIpcMemHandle_t* handle, void* devPtr ) # noqa | |
| Function( | |
| "cudaIpcGetMemHandle", | |
| cudaError_t, | |
| [ctypes.POINTER(cudaIpcMemHandle_t), ctypes.c_void_p], | |
| ), | |
| # cudaError_t cudaIpcOpenMemHandle ( void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags ) # noqa | |
| Function( | |
| "cudaIpcOpenMemHandle", | |
| cudaError_t, | |
| [ctypes.POINTER(ctypes.c_void_p), cudaIpcMemHandle_t, ctypes.c_uint], | |
| ), | |
| ] | |
| # class attribute to store the mapping from the path to the library | |
| # to avoid loading the same library multiple times | |
| path_to_library_cache: Dict[str, Any] = {} | |
| # class attribute to store the mapping from library path | |
| # to the corresponding dictionary | |
| path_to_dict_mapping: Dict[str, Dict[str, Any]] = {} | |
| def __init__(self, so_file: Optional[str] = None): | |
| if so_file is None: | |
| so_file = find_loaded_library("libcudart") | |
| assert so_file is not None, "libcudart is not loaded in the current process" | |
| if so_file not in CudaRTLibrary.path_to_library_cache: | |
| lib = ctypes.CDLL(so_file) | |
| CudaRTLibrary.path_to_library_cache[so_file] = lib | |
| self.lib = CudaRTLibrary.path_to_library_cache[so_file] | |
| if so_file not in CudaRTLibrary.path_to_dict_mapping: | |
| _funcs = {} | |
| for func in CudaRTLibrary.exported_functions: | |
| f = getattr(self.lib, func.name) | |
| f.restype = func.restype | |
| f.argtypes = func.argtypes | |
| _funcs[func.name] = f | |
| CudaRTLibrary.path_to_dict_mapping[so_file] = _funcs | |
| self.funcs = CudaRTLibrary.path_to_dict_mapping[so_file] | |
| def CUDART_CHECK(self, result: cudaError_t) -> None: | |
| if result != 0: | |
| error_str = self.cudaGetErrorString(result) | |
| raise RuntimeError(f"CUDART error: {error_str}") | |
| def cudaGetErrorString(self, error: cudaError_t) -> str: | |
| return self.funcs["cudaGetErrorString"](error).decode("utf-8") | |
| def cudaSetDevice(self, device: int) -> None: | |
| self.CUDART_CHECK(self.funcs["cudaSetDevice"](device)) | |
| def cudaDeviceSynchronize(self) -> None: | |
| self.CUDART_CHECK(self.funcs["cudaDeviceSynchronize"]()) | |
| def cudaDeviceReset(self) -> None: | |
| self.CUDART_CHECK(self.funcs["cudaDeviceReset"]()) | |
| def cudaMalloc(self, size: int) -> ctypes.c_void_p: | |
| devPtr = ctypes.c_void_p() | |
| self.CUDART_CHECK(self.funcs["cudaMalloc"](ctypes.byref(devPtr), size)) | |
| return devPtr | |
| def cudaFree(self, devPtr: ctypes.c_void_p) -> None: | |
| self.CUDART_CHECK(self.funcs["cudaFree"](devPtr)) | |
| def cudaMemset(self, devPtr: ctypes.c_void_p, value: int, count: int) -> None: | |
| self.CUDART_CHECK(self.funcs["cudaMemset"](devPtr, value, count)) | |
| def cudaMemcpy( | |
| self, dst: ctypes.c_void_p, src: ctypes.c_void_p, count: int | |
| ) -> None: | |
| cudaMemcpyDefault = 4 | |
| kind = cudaMemcpyDefault | |
| self.CUDART_CHECK(self.funcs["cudaMemcpy"](dst, src, count, kind)) | |
| def cudaIpcGetMemHandle(self, devPtr: ctypes.c_void_p) -> cudaIpcMemHandle_t: | |
| handle = cudaIpcMemHandle_t() | |
| self.CUDART_CHECK( | |
| self.funcs["cudaIpcGetMemHandle"](ctypes.byref(handle), devPtr) | |
| ) | |
| return handle | |
| def cudaIpcOpenMemHandle(self, handle: cudaIpcMemHandle_t) -> ctypes.c_void_p: | |
| cudaIpcMemLazyEnablePeerAccess = 1 | |
| devPtr = ctypes.c_void_p() | |
| self.CUDART_CHECK( | |
| self.funcs["cudaIpcOpenMemHandle"]( | |
| ctypes.byref(devPtr), handle, cudaIpcMemLazyEnablePeerAccess | |
| ) | |
| ) | |
| return devPtr | |
Xet Storage Details
- Size:
- 7.04 kB
- Xet hash:
- 72851599de200e399b96d411bde46a634017c1410ef53957848fbf1abbcb2a27
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.