File size: 11,243 Bytes
69e1a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import functools
import json
import struct
import subprocess
import sys
from pathlib import Path
from typing import TYPE_CHECKING

from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
from cuda.pathfinder._dynamic_libs.load_dl_common import (
    DynamicLibNotAvailableError,
    DynamicLibNotFoundError,
    DynamicLibUnknownError,
    LoadedDL,
    load_dependencies,
)
from cuda.pathfinder._dynamic_libs.platform_loader import LOADER
from cuda.pathfinder._dynamic_libs.search_steps import (
    EARLY_FIND_STEPS,
    LATE_FIND_STEPS,
    SearchContext,
    derive_ctk_root,
    find_via_ctk_root,
    run_find_steps,
)
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS

if TYPE_CHECKING:
    from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor

# All libnames recognized by load_nvidia_dynamic_lib, across all categories
# (CTK, third-party, driver).
_ALL_KNOWN_LIBNAMES: frozenset[str] = frozenset(LIB_DESCRIPTORS)
_ALL_SUPPORTED_LIBNAMES: frozenset[str] = frozenset(
    name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames)
)
_PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux"
_CANARY_PROBE_MODULE = "cuda.pathfinder._dynamic_libs.canary_probe_subprocess"
_CANARY_PROBE_TIMEOUT_SECONDS = 10.0
_CANARY_PROBE_IMPORT_ROOT = Path(__file__).resolve().parents[3]

# Driver libraries: shipped with the NVIDIA display driver, always on the
# system linker path.  These skip all CTK search steps (site-packages,
# conda, CUDA_HOME, canary) and go straight to system search.
_DRIVER_ONLY_LIBNAMES = frozenset(name for name, desc in LIB_DESCRIPTORS.items() if desc.packaged_with == "driver")


def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
    """Load an NVIDIA driver library (system-search only).

    Driver libs (libcuda, libnvidia-ml) are part of the display driver, not
    the CUDA Toolkit.  They are always on the system linker path, so the
    full CTK search cascade (site-packages, conda, CUDA_HOME, canary) is
    unnecessary.
    """
    loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, False)
    if loaded is not None:
        return loaded
    loaded = LOADER.load_with_system_search(desc)
    if loaded is not None:
        return loaded
    raise DynamicLibNotFoundError(
        f'"{desc.name}" is an NVIDIA driver library and can only be found via'
        f" system search. Ensure the NVIDIA display driver is installed."
    )


def _coerce_subprocess_output(output: str | bytes | None) -> str:
    if isinstance(output, bytes):
        return output.decode(errors="replace")
    return "" if output is None else output


def _raise_canary_probe_child_process_error(
    *,
    returncode: int | None = None,
    timeout: float | None = None,
    stderr: str | bytes | None = None,
) -> None:
    if timeout is None:
        error_line = f"Canary probe child process exited with code {returncode}."
    else:
        error_line = f"Canary probe child process timed out after {timeout} seconds."
    raise ChildProcessError(
        f"{error_line}\n"
        "--- stderr-from-child-process ---\n"
        f"{_coerce_subprocess_output(stderr)}"
        "<end-of-stderr-from-child-process>\n"
    )


@functools.cache
def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
    """Resolve a canary library's absolute path in a fresh Python subprocess."""
    try:
        result = subprocess.run(  # noqa: S603 - trusted argv: current interpreter + internal probe module
            [sys.executable, "-m", _CANARY_PROBE_MODULE, libname],
            capture_output=True,
            text=True,
            timeout=_CANARY_PROBE_TIMEOUT_SECONDS,
            check=False,
            cwd=_CANARY_PROBE_IMPORT_ROOT,
        )
    except subprocess.TimeoutExpired as exc:
        _raise_canary_probe_child_process_error(timeout=exc.timeout, stderr=exc.stderr)

    if result.returncode != 0:
        _raise_canary_probe_child_process_error(returncode=result.returncode, stderr=result.stderr)

    # Use the final non-empty line in case earlier output lines are emitted.
    lines = [line for line in result.stdout.splitlines() if line.strip()]
    if not lines:
        raise RuntimeError(f"Canary probe child process produced no stdout payload for {libname!r}")
    try:
        payload = json.loads(lines[-1])
    except json.JSONDecodeError:
        raise RuntimeError(
            f"Canary probe child process emitted invalid JSON payload for {libname!r}: {lines[-1]!r}"
        ) from None
    if isinstance(payload, str):
        return payload
    if payload is None:
        return None
    raise RuntimeError(f"Canary probe child process emitted unexpected payload for {libname!r}: {payload!r}")


def _try_ctk_root_canary(ctx: SearchContext) -> str | None:
    """Try CTK-root canary fallback for descriptor-configured libraries."""
    for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames:
        canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname)
        if canary_abs_path is None:
            continue
        ctk_root = derive_ctk_root(canary_abs_path)
        if ctk_root is None:
            continue
        find = find_via_ctk_root(ctx, ctk_root)
        if find is not None:
            return str(find.abs_path)
    return None


def _load_lib_no_cache(libname: str) -> LoadedDL:
    desc = LIB_DESCRIPTORS[libname]

    if libname in _DRIVER_ONLY_LIBNAMES:
        return _load_driver_lib_no_cache(desc)

    ctx = SearchContext(desc)

    # Phase 1: Try to find the library file on disk (pip wheels, conda).
    find = run_find_steps(ctx, EARLY_FIND_STEPS)

    # Phase 2: Cross-cutting — already-loaded check and dependency loading.
    # The already-loaded check on Windows uses the "have we found a path?"
    # flag to decide whether to apply AddDllDirectory side-effects.
    loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, find is not None)
    load_dependencies(desc, load_nvidia_dynamic_lib)
    if loaded is not None:
        return loaded

    # Phase 3: Load from found path, or fall back to system search + late find.
    if find is not None:
        return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via)

    loaded = LOADER.load_with_system_search(desc)
    if loaded is not None:
        return loaded

    find = run_find_steps(ctx, LATE_FIND_STEPS)
    if find is not None:
        return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via)

    if desc.ctk_root_canary_anchor_libnames:
        canary_abs_path = _try_ctk_root_canary(ctx)
        if canary_abs_path is not None:
            return LOADER.load_with_abs_path(desc, canary_abs_path, "system-ctk-root")

    ctx.raise_not_found()


@functools.cache
def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
    """Load an NVIDIA dynamic library by name.

    Args:
        libname (str): The short name of the library to load (e.g., ``"cudart"``,
            ``"nvvm"``, etc.).

    Returns:
        LoadedDL: Object containing the OS library handle and absolute path.

        **Important:**

        **Never close the returned handle.** Do **not** call ``dlclose`` (Linux) or
        ``FreeLibrary`` (Windows) on the ``LoadedDL._handle_uint``.

        **Why:** the return value is cached (``functools.cache``) and shared across the
        process. Closing the handle can unload the module while other code still uses
        it, leading to crashes or subtle failures.

        This applies to Linux and Windows. For context, see issue #1011:
        https://github.com/NVIDIA/cuda-python/issues/1011

    Raises:
        DynamicLibUnknownError: If ``libname`` is not a recognized library name.
        DynamicLibNotAvailableError: If ``libname`` is recognized but not
            supported on this platform.
        DynamicLibNotFoundError: If the library cannot be found or loaded.
        RuntimeError: If Python is not 64-bit.

    Search order:
        0. **Already loaded in the current process**

           - If a matching library is already loaded by some other component,
             return its absolute path and handle and skip the rest of the search.

        1. **NVIDIA Python wheels**

           - Scan installed distributions (``site-packages``) to find libraries
             shipped in NVIDIA wheels.

        2. **Conda environment**

           - Conda installations are discovered via ``CONDA_PREFIX``, which is
             defined automatically in activated conda environments (see
             https://docs.conda.io/projects/conda-build/en/stable/user-guide/environment-variables.html).

        3. **OS default mechanisms**

           - Fall back to the native loader:

             - Linux: ``dlopen()``

             - Windows: ``LoadLibraryW()``

           - CUDA Toolkit (CTK) system installs with system config updates are often
             discovered via:

             - Linux: ``/etc/ld.so.conf.d/*cuda*.conf``

             - Windows: ``C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\vX.Y\\bin``
               on the system ``PATH``.

        4. **Environment variables**

           - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).

        5. **CTK root canary probe (discoverable libs only)**

           - For selected libraries whose shared object doesn't reside on the
             standard linker path (currently ``nvvm``), attempt to derive CTK
             root by system-loading a well-known CTK canary library in a
             subprocess and then searching relative to that root.

    **Driver libraries** (``"cuda"``, ``"nvml"``):

        These are part of the NVIDIA display driver (not the CUDA Toolkit) and
        are always on the system linker path.  For these libraries the search
        is simplified to:

        0. Already loaded in the current process
        1. OS default mechanisms (``dlopen`` / ``LoadLibraryW``)

        The CTK-specific steps (site-packages, conda, ``CUDA_HOME``, canary
        probe) are skipped entirely.

    Notes:
        The search is performed **per library**. There is currently no mechanism to
        guarantee that multiple libraries are all resolved from the same location.

    """
    pointer_size_bits = struct.calcsize("P") * 8
    if pointer_size_bits != 64:
        raise RuntimeError(
            f"cuda.pathfinder.load_nvidia_dynamic_lib() requires 64-bit Python."
            f" Currently running: {pointer_size_bits}-bit Python"
            f" {sys.version_info.major}.{sys.version_info.minor}"
        )
    if libname not in _ALL_KNOWN_LIBNAMES:
        raise DynamicLibUnknownError(f"Unknown library name: {libname!r}. Known names: {sorted(_ALL_KNOWN_LIBNAMES)}")
    if libname not in _ALL_SUPPORTED_LIBNAMES:
        raise DynamicLibNotAvailableError(
            f"Library name {libname!r} is known but not available on {_PLATFORM_NAME}. "
            f"Supported names on {_PLATFORM_NAME}: {sorted(_ALL_SUPPORTED_LIBNAMES)}"
        )
    return _load_lib_no_cache(libname)