Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/common_functions.h +65 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_bf16.h +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_bf16.hpp +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_egl_interop.h +645 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_fp16.h +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_fp8.h +475 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_occupancy.h +2094 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_pipeline_primitives.h +148 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti.h +123 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_events.h +1349 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_pcsampling.h +936 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_runtime_cbid.h +504 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/device_functions.h +65 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/library_types.h +111 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/nvperf_cuda_host.h +179 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_32_intrinsics.hpp +588 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_60_atomic_functions.h +330 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_61_intrinsics.hpp +161 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/convert.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/pack.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/tags.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/unpack.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE +3 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE.APACHE +177 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE.BSD +23 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__init__.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_elffile.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_manylinux.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_musllinux.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_parser.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_structures.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_tokenizer.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/markers.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/requirements.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/specifiers.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/tags.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/utils.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/version.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_elffile.py +108 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_musllinux.py +83 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_parser.py +356 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_structures.py +61 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_tokenizer.py +192 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/markers.py +253 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/specifiers.py +1011 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/tags.py +571 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/utils.py +172 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/common_functions.h
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__)
|
| 51 |
+
#if defined(_MSC_VER)
|
| 52 |
+
#pragma message("common_functions.h is an internal header file and must not be used directly. This file will be removed in a future CUDA release. Please use cuda_runtime_api.h or cuda_runtime.h instead.")
|
| 53 |
+
#else
|
| 54 |
+
#warning "common_functions.h is an internal header file and must not be used directly. This file will be removed in a future CUDA release. Please use cuda_runtime_api.h or cuda_runtime.h instead."
|
| 55 |
+
#endif
|
| 56 |
+
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
| 57 |
+
#define __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_COMMON_FUNCTIONS_H_WRAPPER__
|
| 58 |
+
#endif
|
| 59 |
+
|
| 60 |
+
#include "crt/common_functions.h"
|
| 61 |
+
|
| 62 |
+
#if defined(__UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_COMMON_FUNCTIONS_H_WRAPPER__)
|
| 63 |
+
#undef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
| 64 |
+
#undef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_COMMON_FUNCTIONS_H_WRAPPER__
|
| 65 |
+
#endif
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_bf16.h
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_bf16.hpp
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_egl_interop.h
ADDED
|
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__CUDA_EGL_INTEROP_H__)
|
| 51 |
+
#define __CUDA_EGL_INTEROP_H__
|
| 52 |
+
|
| 53 |
+
#include "cuda_runtime_api.h"
|
| 54 |
+
#include "cuda_runtime.h"
|
| 55 |
+
#include "cudart_platform.h"
|
| 56 |
+
#include "EGL/egl.h"
|
| 57 |
+
#include "EGL/eglext.h"
|
| 58 |
+
|
| 59 |
+
#if defined(__cplusplus)
|
| 60 |
+
extern "C" {
|
| 61 |
+
#endif /* __cplusplus */
|
| 62 |
+
|
| 63 |
+
/**
|
| 64 |
+
* \addtogroup CUDART_TYPES
|
| 65 |
+
* @{
|
| 66 |
+
*/
|
| 67 |
+
|
| 68 |
+
/**
|
| 69 |
+
* Maximum number of planes per frame
|
| 70 |
+
*/
|
| 71 |
+
#define CUDA_EGL_MAX_PLANES 3
|
| 72 |
+
|
| 73 |
+
/**
|
| 74 |
+
* CUDA EglFrame type - array or pointer
|
| 75 |
+
*/
|
| 76 |
+
typedef enum cudaEglFrameType_enum
|
| 77 |
+
{
|
| 78 |
+
cudaEglFrameTypeArray = 0, /**< Frame type CUDA array */
|
| 79 |
+
cudaEglFrameTypePitch = 1, /**< Frame type CUDA pointer */
|
| 80 |
+
} cudaEglFrameType;
|
| 81 |
+
|
| 82 |
+
/**
|
| 83 |
+
* Resource location flags- sysmem or vidmem
|
| 84 |
+
*
|
| 85 |
+
* For CUDA context on iGPU, since video and system memory are equivalent -
|
| 86 |
+
* these flags will not have an effect on the execution.
|
| 87 |
+
*
|
| 88 |
+
* For CUDA context on dGPU, applications can use the flag ::cudaEglResourceLocationFlags
|
| 89 |
+
* to give a hint about the desired location.
|
| 90 |
+
*
|
| 91 |
+
* ::cudaEglResourceLocationSysmem - the frame data is made resident on the system memory
|
| 92 |
+
* to be accessed by CUDA.
|
| 93 |
+
*
|
| 94 |
+
* ::cudaEglResourceLocationVidmem - the frame data is made resident on the dedicated
|
| 95 |
+
* video memory to be accessed by CUDA.
|
| 96 |
+
*
|
| 97 |
+
* There may be an additional latency due to new allocation and data migration,
|
| 98 |
+
* if the frame is produced on a different memory.
|
| 99 |
+
*/
|
| 100 |
+
typedef enum cudaEglResourceLocationFlags_enum {
|
| 101 |
+
cudaEglResourceLocationSysmem = 0x00, /**< Resource location sysmem */
|
| 102 |
+
cudaEglResourceLocationVidmem = 0x01, /**< Resource location vidmem */
|
| 103 |
+
} cudaEglResourceLocationFlags;
|
| 104 |
+
|
| 105 |
+
/**
|
| 106 |
+
* CUDA EGL Color Format - The different planar and multiplanar formats currently supported for CUDA_EGL interops.
|
| 107 |
+
*/
|
| 108 |
+
typedef enum cudaEglColorFormat_enum {
|
| 109 |
+
cudaEglColorFormatYUV420Planar = 0, /**< Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 110 |
+
cudaEglColorFormatYUV420SemiPlanar = 1, /**< Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV420Planar. */
|
| 111 |
+
cudaEglColorFormatYUV422Planar = 2, /**< Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 112 |
+
cudaEglColorFormatYUV422SemiPlanar = 3, /**< Y, UV in two surfaces with VU byte ordering, width, height ratio same as YUV422Planar. */
|
| 113 |
+
cudaEglColorFormatARGB = 6, /**< R/G/B/A four channels in one surface with BGRA byte ordering. */
|
| 114 |
+
cudaEglColorFormatRGBA = 7, /**< R/G/B/A four channels in one surface with ABGR byte ordering. */
|
| 115 |
+
cudaEglColorFormatL = 8, /**< single luminance channel in one surface. */
|
| 116 |
+
cudaEglColorFormatR = 9, /**< single color channel in one surface. */
|
| 117 |
+
cudaEglColorFormatYUV444Planar = 10, /**< Y, U, V in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height. */
|
| 118 |
+
cudaEglColorFormatYUV444SemiPlanar = 11, /**< Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV444Planar. */
|
| 119 |
+
cudaEglColorFormatYUYV422 = 12, /**< Y, U, V in one surface, interleaved as UYVY in one channel. */
|
| 120 |
+
cudaEglColorFormatUYVY422 = 13, /**< Y, U, V in one surface, interleaved as YUYV in one channel. */
|
| 121 |
+
cudaEglColorFormatABGR = 14, /**< R/G/B/A four channels in one surface with RGBA byte ordering. */
|
| 122 |
+
cudaEglColorFormatBGRA = 15, /**< R/G/B/A four channels in one surface with ARGB byte ordering. */
|
| 123 |
+
cudaEglColorFormatA = 16, /**< Alpha color format - one channel in one surface. */
|
| 124 |
+
cudaEglColorFormatRG = 17, /**< R/G color format - two channels in one surface with GR byte ordering */
|
| 125 |
+
cudaEglColorFormatAYUV = 18, /**< Y, U, V, A four channels in one surface, interleaved as VUYA. */
|
| 126 |
+
cudaEglColorFormatYVU444SemiPlanar = 19, /**< Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height. */
|
| 127 |
+
cudaEglColorFormatYVU422SemiPlanar = 20, /**< Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 128 |
+
cudaEglColorFormatYVU420SemiPlanar = 21, /**< Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 129 |
+
cudaEglColorFormatY10V10U10_444SemiPlanar = 22, /**< Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height. */
|
| 130 |
+
cudaEglColorFormatY10V10U10_420SemiPlanar = 23, /**< Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 131 |
+
cudaEglColorFormatY12V12U12_444SemiPlanar = 24, /**< Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height. */
|
| 132 |
+
cudaEglColorFormatY12V12U12_420SemiPlanar = 25, /**< Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 133 |
+
cudaEglColorFormatVYUY_ER = 26, /**< Extended Range Y, U, V in one surface, interleaved as YVYU in one channel. */
|
| 134 |
+
cudaEglColorFormatUYVY_ER = 27, /**< Extended Range Y, U, V in one surface, interleaved as YUYV in one channel. */
|
| 135 |
+
cudaEglColorFormatYUYV_ER = 28, /**< Extended Range Y, U, V in one surface, interleaved as UYVY in one channel. */
|
| 136 |
+
cudaEglColorFormatYVYU_ER = 29, /**< Extended Range Y, U, V in one surface, interleaved as VYUY in one channel. */
|
| 137 |
+
cudaEglColorFormatYUVA_ER = 31, /**< Extended Range Y, U, V, A four channels in one surface, interleaved as AVUY. */
|
| 138 |
+
cudaEglColorFormatAYUV_ER = 32, /**< Extended Range Y, U, V, A four channels in one surface, interleaved as VUYA. */
|
| 139 |
+
cudaEglColorFormatYUV444Planar_ER = 33, /**< Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V height = Y height. */
|
| 140 |
+
cudaEglColorFormatYUV422Planar_ER = 34, /**< Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 141 |
+
cudaEglColorFormatYUV420Planar_ER = 35, /**< Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 142 |
+
cudaEglColorFormatYUV444SemiPlanar_ER = 36, /**< Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = Y width, U/V height = Y height. */
|
| 143 |
+
cudaEglColorFormatYUV422SemiPlanar_ER = 37, /**< Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 144 |
+
cudaEglColorFormatYUV420SemiPlanar_ER = 38, /**< Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 145 |
+
cudaEglColorFormatYVU444Planar_ER = 39, /**< Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V height = Y height. */
|
| 146 |
+
cudaEglColorFormatYVU422Planar_ER = 40, /**< Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 147 |
+
cudaEglColorFormatYVU420Planar_ER = 41, /**< Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 148 |
+
cudaEglColorFormatYVU444SemiPlanar_ER = 42, /**< Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height. */
|
| 149 |
+
cudaEglColorFormatYVU422SemiPlanar_ER = 43, /**< Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 150 |
+
cudaEglColorFormatYVU420SemiPlanar_ER = 44, /**< Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 151 |
+
cudaEglColorFormatBayerRGGB = 45, /**< Bayer format - one channel in one surface with interleaved RGGB ordering. */
|
| 152 |
+
cudaEglColorFormatBayerBGGR = 46, /**< Bayer format - one channel in one surface with interleaved BGGR ordering. */
|
| 153 |
+
cudaEglColorFormatBayerGRBG = 47, /**< Bayer format - one channel in one surface with interleaved GRBG ordering. */
|
| 154 |
+
cudaEglColorFormatBayerGBRG = 48, /**< Bayer format - one channel in one surface with interleaved GBRG ordering. */
|
| 155 |
+
cudaEglColorFormatBayer10RGGB = 49, /**< Bayer10 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 10 bits used 6 bits No-op. */
|
| 156 |
+
cudaEglColorFormatBayer10BGGR = 50, /**< Bayer10 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 10 bits used 6 bits No-op. */
|
| 157 |
+
cudaEglColorFormatBayer10GRBG = 51, /**< Bayer10 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 10 bits used 6 bits No-op. */
|
| 158 |
+
cudaEglColorFormatBayer10GBRG = 52, /**< Bayer10 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 10 bits used 6 bits No-op. */
|
| 159 |
+
cudaEglColorFormatBayer12RGGB = 53, /**< Bayer12 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 160 |
+
cudaEglColorFormatBayer12BGGR = 54, /**< Bayer12 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 161 |
+
cudaEglColorFormatBayer12GRBG = 55, /**< Bayer12 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 162 |
+
cudaEglColorFormatBayer12GBRG = 56, /**< Bayer12 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 163 |
+
cudaEglColorFormatBayer14RGGB = 57, /**< Bayer14 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 14 bits used 2 bits No-op. */
|
| 164 |
+
cudaEglColorFormatBayer14BGGR = 58, /**< Bayer14 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 14 bits used 2 bits No-op. */
|
| 165 |
+
cudaEglColorFormatBayer14GRBG = 59, /**< Bayer14 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 14 bits used 2 bits No-op. */
|
| 166 |
+
cudaEglColorFormatBayer14GBRG = 60, /**< Bayer14 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 14 bits used 2 bits No-op. */
|
| 167 |
+
cudaEglColorFormatBayer20RGGB = 61, /**< Bayer20 format - one channel in one surface with interleaved RGGB ordering. Out of 32 bits, 20 bits used 12 bits No-op. */
|
| 168 |
+
cudaEglColorFormatBayer20BGGR = 62, /**< Bayer20 format - one channel in one surface with interleaved BGGR ordering. Out of 32 bits, 20 bits used 12 bits No-op. */
|
| 169 |
+
cudaEglColorFormatBayer20GRBG = 63, /**< Bayer20 format - one channel in one surface with interleaved GRBG ordering. Out of 32 bits, 20 bits used 12 bits No-op. */
|
| 170 |
+
cudaEglColorFormatBayer20GBRG = 64, /**< Bayer20 format - one channel in one surface with interleaved GBRG ordering. Out of 32 bits, 20 bits used 12 bits No-op. */
|
| 171 |
+
cudaEglColorFormatYVU444Planar = 65, /**< Y, V, U in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height. */
|
| 172 |
+
cudaEglColorFormatYVU422Planar = 66, /**< Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height. */
|
| 173 |
+
cudaEglColorFormatYVU420Planar = 67, /**< Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 174 |
+
cudaEglColorFormatBayerIspRGGB = 68, /**< Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved RGGB ordering and mapped to opaque integer datatype. */
|
| 175 |
+
cudaEglColorFormatBayerIspBGGR = 69, /**< Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved BGGR ordering and mapped to opaque integer datatype. */
|
| 176 |
+
cudaEglColorFormatBayerIspGRBG = 70, /**< Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GRBG ordering and mapped to opaque integer datatype. */
|
| 177 |
+
cudaEglColorFormatBayerIspGBRG = 71, /**< Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GBRG ordering and mapped to opaque integer datatype. */
|
| 178 |
+
cudaEglColorFormatBayerBCCR = 72, /**< Bayer format - one channel in one surface with interleaved BCCR ordering. */
|
| 179 |
+
cudaEglColorFormatBayerRCCB = 73, /**< Bayer format - one channel in one surface with interleaved RCCB ordering. */
|
| 180 |
+
cudaEglColorFormatBayerCRBC = 74, /**< Bayer format - one channel in one surface with interleaved CRBC ordering. */
|
| 181 |
+
cudaEglColorFormatBayerCBRC = 75, /**< Bayer format - one channel in one surface with interleaved CBRC ordering. */
|
| 182 |
+
cudaEglColorFormatBayer10CCCC = 76, /**< Bayer10 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 10 bits used 6 bits No-op. */
|
| 183 |
+
cudaEglColorFormatBayer12BCCR = 77, /**< Bayer12 format - one channel in one surface with interleaved BCCR ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 184 |
+
cudaEglColorFormatBayer12RCCB = 78, /**< Bayer12 format - one channel in one surface with interleaved RCCB ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 185 |
+
cudaEglColorFormatBayer12CRBC = 79, /**< Bayer12 format - one channel in one surface with interleaved CRBC ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 186 |
+
cudaEglColorFormatBayer12CBRC = 80, /**< Bayer12 format - one channel in one surface with interleaved CBRC ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 187 |
+
cudaEglColorFormatBayer12CCCC = 81, /**< Bayer12 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 12 bits used 4 bits No-op. */
|
| 188 |
+
cudaEglColorFormatY = 82, /**< Color format for single Y plane. */
|
| 189 |
+
cudaEglColorFormatYUV420SemiPlanar_2020 = 83, /**< Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 190 |
+
cudaEglColorFormatYVU420SemiPlanar_2020 = 84, /**< Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 191 |
+
cudaEglColorFormatYUV420Planar_2020 = 85, /**< Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 192 |
+
cudaEglColorFormatYVU420Planar_2020 = 86, /**< Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 193 |
+
cudaEglColorFormatYUV420SemiPlanar_709 = 87, /**< Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 194 |
+
cudaEglColorFormatYVU420SemiPlanar_709 = 88, /**< Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 195 |
+
cudaEglColorFormatYUV420Planar_709 = 89, /**< Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 196 |
+
cudaEglColorFormatYVU420Planar_709 = 90, /**< Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 197 |
+
cudaEglColorFormatY10V10U10_420SemiPlanar_709 = 91, /**< Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 198 |
+
cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = 92, /**< Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 199 |
+
cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = 93, /**< Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height. */
|
| 200 |
+
cudaEglColorFormatY10V10U10_422SemiPlanar = 94, /**< Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height. */
|
| 201 |
+
cudaEglColorFormatY10V10U10_422SemiPlanar_709 = 95, /**< Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height. */
|
| 202 |
+
cudaEglColorFormatY_ER = 96, /**< Extended Range Color format for single Y plane. */
|
| 203 |
+
cudaEglColorFormatY_709_ER = 97, /**< Extended Range Color format for single Y plane. */
|
| 204 |
+
cudaEglColorFormatY10_ER = 98, /**< Extended Range Color format for single Y10 plane. */
|
| 205 |
+
cudaEglColorFormatY10_709_ER = 99, /**< Extended Range Color format for single Y10 plane. */
|
| 206 |
+
cudaEglColorFormatY12_ER = 100, /**< Extended Range Color format for single Y12 plane. */
|
| 207 |
+
cudaEglColorFormatY12_709_ER = 101, /**< Extended Range Color format for single Y12 plane. */
|
| 208 |
+
cudaEglColorFormatYUVA = 102, /**< Y, U, V, A four channels in one surface, interleaved as AVUY. */
|
| 209 |
+
cudaEglColorFormatYVYU = 104, /**< Y, U, V in one surface, interleaved as YVYU in one channel. */
|
| 210 |
+
cudaEglColorFormatVYUY = 105, /**< Y, U, V in one surface, interleaved as VYUY in one channel. */
|
| 211 |
+
cudaEglColorFormatY10V10U10_420SemiPlanar_ER = 106, /**< Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 212 |
+
cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = 107, /**< Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 213 |
+
cudaEglColorFormatY10V10U10_444SemiPlanar_ER = 108, /**< Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height. */
|
| 214 |
+
cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = 109, /**< Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height. */
|
| 215 |
+
cudaEglColorFormatY12V12U12_420SemiPlanar_ER = 110, /**< Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 216 |
+
cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = 111, /**< Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height. */
|
| 217 |
+
cudaEglColorFormatY12V12U12_444SemiPlanar_ER = 112, /**< Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height. */
|
| 218 |
+
cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = 113, /**< Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height. */
|
| 219 |
+
cudaEglColorFormatUYVY709 = 114, /**< Y, U, V in one surface, interleaved as UYVY in one channel. */
|
| 220 |
+
cudaEglColorFormatUYVY709_ER = 115, /**< Extended Range Y, U, V in one surface, interleaved as UYVY in one channel. */
|
| 221 |
+
cudaEglColorFormatUYVY2020 = 116, /**< Y, U, V in one surface, interleaved as UYVY in one channel. */
|
| 222 |
+
} cudaEglColorFormat;
|
| 223 |
+
|
| 224 |
+
/**
|
| 225 |
+
* CUDA EGL Plane Descriptor - structure defining each plane of a CUDA EGLFrame
|
| 226 |
+
*/
|
| 227 |
+
typedef struct cudaEglPlaneDesc_st {
|
| 228 |
+
unsigned int width; /**< Width of plane */
|
| 229 |
+
unsigned int height; /**< Height of plane */
|
| 230 |
+
unsigned int depth; /**< Depth of plane */
|
| 231 |
+
unsigned int pitch; /**< Pitch of plane */
|
| 232 |
+
unsigned int numChannels; /**< Number of channels for the plane */
|
| 233 |
+
struct cudaChannelFormatDesc channelDesc; /**< Channel Format Descriptor */
|
| 234 |
+
unsigned int reserved[4]; /**< Reserved for future use */
|
| 235 |
+
} cudaEglPlaneDesc;
|
| 236 |
+
|
| 237 |
+
/**
|
| 238 |
+
* CUDA EGLFrame Descriptor - structure defining one frame of EGL.
|
| 239 |
+
*
|
| 240 |
+
* Each frame may contain one or more planes depending on whether the surface is Multiplanar or not.
|
| 241 |
+
* Each plane of EGLFrame is represented by ::cudaEglPlaneDesc which is defined as:
|
| 242 |
+
* \code
|
| 243 |
+
* typedef struct cudaEglPlaneDesc_st {
|
| 244 |
+
* unsigned int width;
|
| 245 |
+
* unsigned int height;
|
| 246 |
+
* unsigned int depth;
|
| 247 |
+
* unsigned int pitch;
|
| 248 |
+
* unsigned int numChannels;
|
| 249 |
+
* struct cudaChannelFormatDesc channelDesc;
|
| 250 |
+
* unsigned int reserved[4];
|
| 251 |
+
* } cudaEglPlaneDesc;
|
| 252 |
+
* \endcode
|
| 253 |
+
|
| 254 |
+
*/
|
| 255 |
+
typedef struct cudaEglFrame_st {
|
| 256 |
+
union {
|
| 257 |
+
cudaArray_t pArray[CUDA_EGL_MAX_PLANES]; /**< Array of CUDA arrays corresponding to each plane*/
|
| 258 |
+
struct cudaPitchedPtr pPitch[CUDA_EGL_MAX_PLANES]; /**< Array of Pointers corresponding to each plane*/
|
| 259 |
+
} frame;
|
| 260 |
+
cudaEglPlaneDesc planeDesc[CUDA_EGL_MAX_PLANES]; /**< CUDA EGL Plane Descriptor ::cudaEglPlaneDesc*/
|
| 261 |
+
unsigned int planeCount; /**< Number of planes */
|
| 262 |
+
cudaEglFrameType frameType; /**< Array or Pitch */
|
| 263 |
+
cudaEglColorFormat eglColorFormat; /**< CUDA EGL Color Format*/
|
| 264 |
+
} cudaEglFrame;
|
| 265 |
+
|
| 266 |
+
/**
|
| 267 |
+
* CUDA EGLSream Connection
|
| 268 |
+
*/
|
| 269 |
+
typedef struct CUeglStreamConnection_st *cudaEglStreamConnection;
|
| 270 |
+
|
| 271 |
+
/** @} */ /* END CUDART_TYPES */
|
| 272 |
+
|
| 273 |
+
/**
|
| 274 |
+
* \addtogroup CUDART_EGL EGL Interoperability
|
| 275 |
+
* This section describes the EGL interoperability functions of the CUDA
|
| 276 |
+
* runtime application programming interface.
|
| 277 |
+
*
|
| 278 |
+
* @{
|
| 279 |
+
*/
|
| 280 |
+
|
| 281 |
+
/**
|
| 282 |
+
* \brief Registers an EGL image
|
| 283 |
+
*
|
| 284 |
+
* Registers the EGLImageKHR specified by \p image for access by
|
| 285 |
+
* CUDA. A handle to the registered object is returned as \p pCudaResource.
|
| 286 |
+
* Additional Mapping/Unmapping is not required for the registered resource and
|
| 287 |
+
* ::cudaGraphicsResourceGetMappedEglFrame can be directly called on the \p pCudaResource.
|
| 288 |
+
*
|
| 289 |
+
* The application will be responsible for synchronizing access to shared objects.
|
| 290 |
+
* The application must ensure that any pending operation which access the objects have completed
|
| 291 |
+
* before passing control to CUDA. This may be accomplished by issuing and waiting for
|
| 292 |
+
* glFinish command on all GLcontexts (for OpenGL and likewise for other APIs).
|
| 293 |
+
* The application will be also responsible for ensuring that any pending operation on the
|
| 294 |
+
* registered CUDA resource has completed prior to executing subsequent commands in other APIs
|
| 295 |
+
* accesing the same memory objects.
|
| 296 |
+
* This can be accomplished by calling cuCtxSynchronize or cuEventSynchronize (preferably).
|
| 297 |
+
*
|
| 298 |
+
* The surface's intended usage is specified using \p flags, as follows:
|
| 299 |
+
*
|
| 300 |
+
* - ::cudaGraphicsRegisterFlagsNone: Specifies no hints about how this
|
| 301 |
+
* resource will be used. It is therefore assumed that this resource will be
|
| 302 |
+
* read from and written to by CUDA. This is the default value.
|
| 303 |
+
* - ::cudaGraphicsRegisterFlagsReadOnly: Specifies that CUDA
|
| 304 |
+
* will not write to this resource.
|
| 305 |
+
* - ::cudaGraphicsRegisterFlagsWriteDiscard: Specifies that
|
| 306 |
+
* CUDA will not read from this resource and will write over the
|
| 307 |
+
* entire contents of the resource, so none of the data previously
|
| 308 |
+
* stored in the resource will be preserved.
|
| 309 |
+
*
|
| 310 |
+
* The EGLImageKHR is an object which can be used to create EGLImage target resource. It is defined as a void pointer.
|
| 311 |
+
* typedef void* EGLImageKHR
|
| 312 |
+
*
|
| 313 |
+
* \param pCudaResource - Pointer to the returned object handle
|
| 314 |
+
* \param image - An EGLImageKHR image which can be used to create target resource.
|
| 315 |
+
* \param flags - Map flags
|
| 316 |
+
*
|
| 317 |
+
* \return
|
| 318 |
+
* ::cudaSuccess,
|
| 319 |
+
* ::cudaErrorInvalidResourceHandle,
|
| 320 |
+
* ::cudaErrorInvalidValue,
|
| 321 |
+
* ::cudaErrorUnknown
|
| 322 |
+
*
|
| 323 |
+
* \sa
|
| 324 |
+
* ::cudaGraphicsUnregisterResource,
|
| 325 |
+
* ::cudaGraphicsResourceGetMappedEglFrame,
|
| 326 |
+
* ::cuGraphicsEGLRegisterImage
|
| 327 |
+
*/
|
| 328 |
+
extern __host__ cudaError_t CUDARTAPI cudaGraphicsEGLRegisterImage(struct cudaGraphicsResource **pCudaResource, EGLImageKHR image, unsigned int flags);
|
| 329 |
+
|
| 330 |
+
/**
|
| 331 |
+
* \brief Connect CUDA to EGLStream as a consumer.
|
| 332 |
+
*
|
| 333 |
+
* Connect CUDA as a consumer to EGLStreamKHR specified by \p eglStream.
|
| 334 |
+
*
|
| 335 |
+
* The EGLStreamKHR is an EGL object that transfers a sequence of image frames from one
|
| 336 |
+
* API to another.
|
| 337 |
+
*
|
| 338 |
+
* \param conn - Pointer to the returned connection handle
|
| 339 |
+
* \param eglStream - EGLStreamKHR handle
|
| 340 |
+
*
|
| 341 |
+
* \return
|
| 342 |
+
* ::cudaSuccess,
|
| 343 |
+
* ::cudaErrorInvalidValue,
|
| 344 |
+
* ::cudaErrorUnknown
|
| 345 |
+
*
|
| 346 |
+
* \sa
|
| 347 |
+
* ::cudaEGLStreamConsumerDisconnect,
|
| 348 |
+
* ::cudaEGLStreamConsumerAcquireFrame,
|
| 349 |
+
* ::cudaEGLStreamConsumerReleaseFrame,
|
| 350 |
+
* ::cuEGLStreamConsumerConnect
|
| 351 |
+
*/
|
| 352 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamConsumerConnect(cudaEglStreamConnection *conn, EGLStreamKHR eglStream);
|
| 353 |
+
|
| 354 |
+
/**
|
| 355 |
+
* \brief Connect CUDA to EGLStream as a consumer with given flags.
|
| 356 |
+
*
|
| 357 |
+
* Connect CUDA as a consumer to EGLStreamKHR specified by \p stream with specified \p flags defined by
|
| 358 |
+
* ::cudaEglResourceLocationFlags.
|
| 359 |
+
*
|
| 360 |
+
* The flags specify whether the consumer wants to access frames from system memory or video memory.
|
| 361 |
+
* Default is ::cudaEglResourceLocationVidmem.
|
| 362 |
+
*
|
| 363 |
+
* \param conn - Pointer to the returned connection handle
|
| 364 |
+
* \param eglStream - EGLStreamKHR handle
|
| 365 |
+
* \param flags - Flags denote intended location - system or video.
|
| 366 |
+
*
|
| 367 |
+
* \return
|
| 368 |
+
* ::cudaSuccess,
|
| 369 |
+
* ::cudaErrorInvalidValue,
|
| 370 |
+
* ::cudaErrorUnknown
|
| 371 |
+
*
|
| 372 |
+
* \sa
|
| 373 |
+
* ::cudaEGLStreamConsumerDisconnect,
|
| 374 |
+
* ::cudaEGLStreamConsumerAcquireFrame,
|
| 375 |
+
* ::cudaEGLStreamConsumerReleaseFrame,
|
| 376 |
+
* ::cuEGLStreamConsumerConnectWithFlags
|
| 377 |
+
*/
|
| 378 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection *conn, EGLStreamKHR eglStream, unsigned int flags);
|
| 379 |
+
|
| 380 |
+
/**
|
| 381 |
+
* \brief Disconnect CUDA as a consumer to EGLStream .
|
| 382 |
+
*
|
| 383 |
+
* Disconnect CUDA as a consumer to EGLStreamKHR.
|
| 384 |
+
*
|
| 385 |
+
* \param conn - Conection to disconnect.
|
| 386 |
+
*
|
| 387 |
+
* \return
|
| 388 |
+
* ::cudaSuccess,
|
| 389 |
+
* ::cudaErrorInvalidValue,
|
| 390 |
+
* ::cudaErrorUnknown
|
| 391 |
+
*
|
| 392 |
+
* \sa
|
| 393 |
+
* ::cudaEGLStreamConsumerConnect,
|
| 394 |
+
* ::cudaEGLStreamConsumerAcquireFrame,
|
| 395 |
+
* ::cudaEGLStreamConsumerReleaseFrame,
|
| 396 |
+
* ::cuEGLStreamConsumerDisconnect
|
| 397 |
+
*/
|
| 398 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection *conn);
|
| 399 |
+
|
| 400 |
+
/**
|
| 401 |
+
* \brief Acquire an image frame from the EGLStream with CUDA as a consumer.
|
| 402 |
+
*
|
| 403 |
+
* Acquire an image frame from EGLStreamKHR.
|
| 404 |
+
* ::cudaGraphicsResourceGetMappedEglFrame can be called on \p pCudaResource to get
|
| 405 |
+
* ::cudaEglFrame.
|
| 406 |
+
*
|
| 407 |
+
* \param conn - Connection on which to acquire
|
| 408 |
+
* \param pCudaResource - CUDA resource on which the EGLStream frame will be mapped for use.
|
| 409 |
+
* \param pStream - CUDA stream for synchronization and any data migrations
|
| 410 |
+
* implied by ::cudaEglResourceLocationFlags.
|
| 411 |
+
* \param timeout - Desired timeout in usec.
|
| 412 |
+
*
|
| 413 |
+
* \return
|
| 414 |
+
* ::cudaSuccess,
|
| 415 |
+
* ::cudaErrorInvalidValue,
|
| 416 |
+
* ::cudaErrorUnknown,
|
| 417 |
+
* ::cudaErrorLaunchTimeout
|
| 418 |
+
*
|
| 419 |
+
* \sa
|
| 420 |
+
* ::cudaEGLStreamConsumerConnect,
|
| 421 |
+
* ::cudaEGLStreamConsumerDisconnect,
|
| 422 |
+
* ::cudaEGLStreamConsumerReleaseFrame,
|
| 423 |
+
* ::cuEGLStreamConsumerAcquireFrame
|
| 424 |
+
*/
|
| 425 |
+
|
| 426 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection *conn,
|
| 427 |
+
cudaGraphicsResource_t *pCudaResource, cudaStream_t *pStream, unsigned int timeout);
|
| 428 |
+
/**
|
| 429 |
+
* \brief Releases the last frame acquired from the EGLStream.
|
| 430 |
+
*
|
| 431 |
+
* Release the acquired image frame specified by \p pCudaResource to EGLStreamKHR.
|
| 432 |
+
*
|
| 433 |
+
* \param conn - Connection on which to release
|
| 434 |
+
* \param pCudaResource - CUDA resource whose corresponding frame is to be released
|
| 435 |
+
* \param pStream - CUDA stream on which release will be done.
|
| 436 |
+
*
|
| 437 |
+
* \return
|
| 438 |
+
* ::cudaSuccess,
|
| 439 |
+
* ::cudaErrorInvalidValue,
|
| 440 |
+
* ::cudaErrorUnknown
|
| 441 |
+
*
|
| 442 |
+
* \sa
|
| 443 |
+
* ::cudaEGLStreamConsumerConnect,
|
| 444 |
+
* ::cudaEGLStreamConsumerDisconnect,
|
| 445 |
+
* ::cudaEGLStreamConsumerAcquireFrame,
|
| 446 |
+
* ::cuEGLStreamConsumerReleaseFrame
|
| 447 |
+
*/
|
| 448 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection *conn,
|
| 449 |
+
cudaGraphicsResource_t pCudaResource, cudaStream_t *pStream);
|
| 450 |
+
|
| 451 |
+
/**
|
| 452 |
+
* \brief Connect CUDA to EGLStream as a producer.
|
| 453 |
+
*
|
| 454 |
+
* Connect CUDA as a producer to EGLStreamKHR specified by \p stream.
|
| 455 |
+
*
|
| 456 |
+
* The EGLStreamKHR is an EGL object that transfers a sequence of image frames from one
|
| 457 |
+
* API to another.
|
| 458 |
+
*
|
| 459 |
+
* \param conn - Pointer to the returned connection handle
|
| 460 |
+
* \param eglStream - EGLStreamKHR handle
|
| 461 |
+
* \param width - width of the image to be submitted to the stream
|
| 462 |
+
* \param height - height of the image to be submitted to the stream
|
| 463 |
+
*
|
| 464 |
+
* \return
|
| 465 |
+
* ::cudaSuccess,
|
| 466 |
+
* ::cudaErrorInvalidValue,
|
| 467 |
+
* ::cudaErrorUnknown
|
| 468 |
+
*
|
| 469 |
+
* \sa
|
| 470 |
+
* ::cudaEGLStreamProducerDisconnect,
|
| 471 |
+
* ::cudaEGLStreamProducerPresentFrame,
|
| 472 |
+
* ::cudaEGLStreamProducerReturnFrame,
|
| 473 |
+
* ::cuEGLStreamProducerConnect
|
| 474 |
+
*/
|
| 475 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamProducerConnect(cudaEglStreamConnection *conn,
|
| 476 |
+
EGLStreamKHR eglStream, EGLint width, EGLint height);
|
| 477 |
+
|
| 478 |
+
/**
|
| 479 |
+
* \brief Disconnect CUDA as a producer to EGLStream .
|
| 480 |
+
*
|
| 481 |
+
* Disconnect CUDA as a producer to EGLStreamKHR.
|
| 482 |
+
*
|
| 483 |
+
* \param conn - Conection to disconnect.
|
| 484 |
+
*
|
| 485 |
+
* \return
|
| 486 |
+
* ::cudaSuccess,
|
| 487 |
+
* ::cudaErrorInvalidValue,
|
| 488 |
+
* ::cudaErrorUnknown
|
| 489 |
+
*
|
| 490 |
+
* \sa
|
| 491 |
+
* ::cudaEGLStreamProducerConnect,
|
| 492 |
+
* ::cudaEGLStreamProducerPresentFrame,
|
| 493 |
+
* ::cudaEGLStreamProducerReturnFrame,
|
| 494 |
+
* ::cuEGLStreamProducerDisconnect
|
| 495 |
+
*/
|
| 496 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamProducerDisconnect(cudaEglStreamConnection *conn);
|
| 497 |
+
|
| 498 |
+
/**
|
| 499 |
+
* \brief Present a CUDA eglFrame to the EGLStream with CUDA as a producer.
|
| 500 |
+
*
|
| 501 |
+
* The ::cudaEglFrame is defined as:
|
| 502 |
+
* \code
|
| 503 |
+
* typedef struct cudaEglFrame_st {
|
| 504 |
+
* union {
|
| 505 |
+
* cudaArray_t pArray[CUDA_EGL_MAX_PLANES];
|
| 506 |
+
* struct cudaPitchedPtr pPitch[CUDA_EGL_MAX_PLANES];
|
| 507 |
+
* } frame;
|
| 508 |
+
* cudaEglPlaneDesc planeDesc[CUDA_EGL_MAX_PLANES];
|
| 509 |
+
* unsigned int planeCount;
|
| 510 |
+
* cudaEglFrameType frameType;
|
| 511 |
+
* cudaEglColorFormat eglColorFormat;
|
| 512 |
+
* } cudaEglFrame;
|
| 513 |
+
* \endcode
|
| 514 |
+
*
|
| 515 |
+
* For ::cudaEglFrame of type ::cudaEglFrameTypePitch, the application may present sub-region of a memory
|
| 516 |
+
* allocation. In that case, ::cudaPitchedPtr::ptr will specify the start address of the sub-region in
|
| 517 |
+
* the allocation and ::cudaEglPlaneDesc will specify the dimensions of the sub-region.
|
| 518 |
+
*
|
| 519 |
+
* \param conn - Connection on which to present the CUDA array
|
| 520 |
+
* \param eglframe - CUDA Eglstream Proucer Frame handle to be sent to the consumer over EglStream.
|
| 521 |
+
* \param pStream - CUDA stream on which to present the frame.
|
| 522 |
+
*
|
| 523 |
+
* \return
|
| 524 |
+
* ::cudaSuccess,
|
| 525 |
+
* ::cudaErrorInvalidValue,
|
| 526 |
+
* ::cudaErrorUnknown
|
| 527 |
+
*
|
| 528 |
+
* \sa
|
| 529 |
+
* ::cudaEGLStreamProducerConnect,
|
| 530 |
+
* ::cudaEGLStreamProducerDisconnect,
|
| 531 |
+
* ::cudaEGLStreamProducerReturnFrame,
|
| 532 |
+
* ::cuEGLStreamProducerPresentFrame
|
| 533 |
+
*/
|
| 534 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection *conn,
|
| 535 |
+
cudaEglFrame eglframe, cudaStream_t *pStream);
|
| 536 |
+
|
| 537 |
+
/**
|
| 538 |
+
* \brief Return the CUDA eglFrame to the EGLStream last released by the consumer.
|
| 539 |
+
*
|
| 540 |
+
* This API can potentially return cudaErrorLaunchTimeout if the consumer has not
|
| 541 |
+
* returned a frame to EGL stream. If timeout is returned the application can retry.
|
| 542 |
+
*
|
| 543 |
+
* \param conn - Connection on which to present the CUDA array
|
| 544 |
+
* \param eglframe - CUDA Eglstream Proucer Frame handle returned from the consumer over EglStream.
|
| 545 |
+
* \param pStream - CUDA stream on which to return the frame.
|
| 546 |
+
*
|
| 547 |
+
* \return
|
| 548 |
+
* ::cudaSuccess,
|
| 549 |
+
* ::cudaErrorLaunchTimeout,
|
| 550 |
+
* ::cudaErrorInvalidValue,
|
| 551 |
+
* ::cudaErrorUnknown
|
| 552 |
+
*
|
| 553 |
+
* \sa
|
| 554 |
+
* ::cudaEGLStreamProducerConnect,
|
| 555 |
+
* ::cudaEGLStreamProducerDisconnect,
|
| 556 |
+
* ::cudaEGLStreamProducerPresentFrame,
|
| 557 |
+
* ::cuEGLStreamProducerReturnFrame
|
| 558 |
+
*/
|
| 559 |
+
extern __host__ cudaError_t CUDARTAPI cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection *conn,
|
| 560 |
+
cudaEglFrame *eglframe, cudaStream_t *pStream);
|
| 561 |
+
|
| 562 |
+
/**
|
| 563 |
+
* \brief Get an eglFrame through which to access a registered EGL graphics resource.
|
| 564 |
+
*
|
| 565 |
+
* Returns in \p *eglFrame an eglFrame pointer through which the registered graphics resource
|
| 566 |
+
* \p resource may be accessed.
|
| 567 |
+
* This API can only be called for EGL graphics resources.
|
| 568 |
+
*
|
| 569 |
+
* The ::cudaEglFrame is defined as
|
| 570 |
+
* \code
|
| 571 |
+
* typedef struct cudaEglFrame_st {
|
| 572 |
+
* union {
|
| 573 |
+
* cudaArray_t pArray[CUDA_EGL_MAX_PLANES];
|
| 574 |
+
* struct cudaPitchedPtr pPitch[CUDA_EGL_MAX_PLANES];
|
| 575 |
+
* } frame;
|
| 576 |
+
* cudaEglPlaneDesc planeDesc[CUDA_EGL_MAX_PLANES];
|
| 577 |
+
* unsigned int planeCount;
|
| 578 |
+
* cudaEglFrameType frameType;
|
| 579 |
+
* cudaEglColorFormat eglColorFormat;
|
| 580 |
+
* } cudaEglFrame;
|
| 581 |
+
* \endcode
|
| 582 |
+
*
|
| 583 |
+
*
|
| 584 |
+
* \param eglFrame - Returned eglFrame.
|
| 585 |
+
* \param resource - Registered resource to access.
|
| 586 |
+
* \param index - Index for cubemap surfaces.
|
| 587 |
+
* \param mipLevel - Mipmap level for the subresource to access.
|
| 588 |
+
*
|
| 589 |
+
* \return
|
| 590 |
+
* ::cudaSuccess,
|
| 591 |
+
* ::cudaErrorInvalidValue,
|
| 592 |
+
* ::cudaErrorUnknown
|
| 593 |
+
*
|
| 594 |
+
* \note Note that in case of multiplanar \p *eglFrame, pitch of only first plane (unsigned int cudaEglPlaneDesc::pitch) is to be considered by the application.
|
| 595 |
+
*
|
| 596 |
+
* \sa
|
| 597 |
+
* ::cudaGraphicsSubResourceGetMappedArray,
|
| 598 |
+
* ::cudaGraphicsResourceGetMappedPointer,
|
| 599 |
+
* ::cuGraphicsResourceGetMappedEglFrame
|
| 600 |
+
*/
|
| 601 |
+
extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame,
|
| 602 |
+
cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel);
|
| 603 |
+
|
| 604 |
+
/**
|
| 605 |
+
* \brief Creates an event from EGLSync object
|
| 606 |
+
*
|
| 607 |
+
* Creates an event *phEvent from an EGLSyncKHR eglSync with the flages specified
|
| 608 |
+
* via \p flags. Valid flags include:
|
| 609 |
+
* - ::cudaEventDefault: Default event creation flag.
|
| 610 |
+
* - ::cudaEventBlockingSync: Specifies that the created event should use blocking
|
| 611 |
+
* synchronization. A CPU thread that uses ::cudaEventSynchronize() to wait on
|
| 612 |
+
* an event created with this flag will block until the event has actually
|
| 613 |
+
* been completed.
|
| 614 |
+
*
|
| 615 |
+
* ::cudaEventRecord and TimingData are not supported for events created from EGLSync.
|
| 616 |
+
*
|
| 617 |
+
* The EGLSyncKHR is an opaque handle to an EGL sync object.
|
| 618 |
+
* typedef void* EGLSyncKHR
|
| 619 |
+
*
|
| 620 |
+
* \param phEvent - Returns newly created event
|
| 621 |
+
* \param eglSync - Opaque handle to EGLSync object
|
| 622 |
+
* \param flags - Event creation flags
|
| 623 |
+
*
|
| 624 |
+
* \return
|
| 625 |
+
* ::cudaSuccess,
|
| 626 |
+
* ::cudaErrorInitializationError,
|
| 627 |
+
* ::cudaErrorInvalidValue,
|
| 628 |
+
* ::cudaErrorLaunchFailure,
|
| 629 |
+
* ::cudaErrorMemoryAllocation
|
| 630 |
+
*
|
| 631 |
+
* \sa
|
| 632 |
+
* ::cudaEventQuery,
|
| 633 |
+
* ::cudaEventSynchronize,
|
| 634 |
+
* ::cudaEventDestroy
|
| 635 |
+
*/
|
| 636 |
+
extern __host__ cudaError_t CUDARTAPI cudaEventCreateFromEGLSync(cudaEvent_t *phEvent, EGLSyncKHR eglSync, unsigned int flags);
|
| 637 |
+
|
| 638 |
+
/** @} */ /* END CUDART_EGL */
|
| 639 |
+
|
| 640 |
+
#if defined(__cplusplus)
|
| 641 |
+
}
|
| 642 |
+
#endif /* __cplusplus */
|
| 643 |
+
|
| 644 |
+
#endif /* __CUDA_EGL_INTEROP_H__ */
|
| 645 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_fp16.h
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_fp8.h
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2022-2024 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#ifndef __CUDA_FP8_H__
|
| 51 |
+
#define __CUDA_FP8_H__
|
| 52 |
+
|
| 53 |
+
/* Set up function decorations */
|
| 54 |
+
#if defined(__CUDACC__)
|
| 55 |
+
#define __CUDA_FP8_DECL__ static __device__ __inline__
|
| 56 |
+
#define __CUDA_HOSTDEVICE_FP8__ __host__ __device__
|
| 57 |
+
#define __CUDA_HOSTDEVICE_FP8_DECL__ static __host__ __device__ __inline__
|
| 58 |
+
#else /* !defined(__CUDACC__) */
|
| 59 |
+
#if defined(__GNUC__)
|
| 60 |
+
#define __CUDA_HOSTDEVICE_FP8_DECL__ static __attribute__((unused))
|
| 61 |
+
#else
|
| 62 |
+
#define __CUDA_HOSTDEVICE_FP8_DECL__ static
|
| 63 |
+
#endif /* defined(__GNUC__) */
|
| 64 |
+
#define __CUDA_HOSTDEVICE_FP8__
|
| 65 |
+
#endif /* defined(__CUDACC_) */
|
| 66 |
+
|
| 67 |
+
#if !defined(_MSC_VER) && __cplusplus >= 201103L
|
| 68 |
+
#define __CPP_VERSION_AT_LEAST_11_FP8
|
| 69 |
+
#elif _MSC_FULL_VER >= 190024210 && _MSVC_LANG >= 201103L
|
| 70 |
+
#define __CPP_VERSION_AT_LEAST_11_FP8
|
| 71 |
+
#endif
|
| 72 |
+
|
| 73 |
+
// implicitly provided by NVRTC
|
| 74 |
+
#if !defined(__CUDACC_RTC__)
|
| 75 |
+
/* bring in enum cudaRoundMode */
|
| 76 |
+
#include "device_types.h"
|
| 77 |
+
#endif /* !defined(__CUDACC_RTC__) */
|
| 78 |
+
|
| 79 |
+
/* bring in __half_raw data type */
|
| 80 |
+
#include "cuda_fp16.h"
|
| 81 |
+
/* bring in __nv_bfloat16_raw data type */
|
| 82 |
+
#include "cuda_bf16.h"
|
| 83 |
+
|
| 84 |
+
// implicitly provided by NVRTC
|
| 85 |
+
#if !defined(__CUDACC_RTC__)
|
| 86 |
+
/* bring in float2, double4, etc vector types */
|
| 87 |
+
#include "vector_types.h"
|
| 88 |
+
#endif /* !defined(__CUDACC_RTC__) */
|
| 89 |
+
|
| 90 |
+
/**
|
| 91 |
+
* \defgroup CUDA_MATH_INTRINSIC_FP8 FP8 Intrinsics
|
| 92 |
+
* This section describes fp8 intrinsic functions.
|
| 93 |
+
* To use these functions, include the header file \p cuda_fp8.h in your
|
| 94 |
+
* program.
|
| 95 |
+
* The following macros are available to help users selectively enable/disable
|
| 96 |
+
* various definitions present in the header file:
|
| 97 |
+
* - \p __CUDA_NO_FP8_CONVERSIONS__ - If defined, this macro will prevent any
|
| 98 |
+
* use of the C++ type conversions (converting constructors and conversion
|
| 99 |
+
* operators) defined in the header.
|
| 100 |
+
* - \p __CUDA_NO_FP8_CONVERSION_OPERATORS__ - If defined, this macro will
|
| 101 |
+
* prevent any use of the C++ conversion operators from \p fp8 to other types.
|
| 102 |
+
*/
|
| 103 |
+
|
| 104 |
+
/**
|
| 105 |
+
* \defgroup CUDA_MATH_FP8_MISC FP8 Conversion and Data Movement
|
| 106 |
+
* \ingroup CUDA_MATH_INTRINSIC_FP8
|
| 107 |
+
* To use these functions, include the header file \p cuda_fp8.h in your
|
| 108 |
+
* program.
|
| 109 |
+
*/
|
| 110 |
+
|
| 111 |
+
/**
|
| 112 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 113 |
+
* \brief 8-bit \p unsigned \p integer
|
| 114 |
+
* type abstraction used for \p fp8 floating-point
|
| 115 |
+
* numbers storage.
|
| 116 |
+
*/
|
| 117 |
+
typedef unsigned char __nv_fp8_storage_t;
|
| 118 |
+
|
| 119 |
+
/**
|
| 120 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 121 |
+
* \brief 16-bit \p unsigned \p integer
|
| 122 |
+
* type abstraction used for storage of pairs of
|
| 123 |
+
* \p fp8 floating-point numbers.
|
| 124 |
+
*/
|
| 125 |
+
typedef unsigned short int __nv_fp8x2_storage_t;
|
| 126 |
+
|
| 127 |
+
/**
|
| 128 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 129 |
+
* \brief 32-bit \p unsigned \p integer
|
| 130 |
+
* type abstraction used for storage of tetrads of
|
| 131 |
+
* \p fp8 floating-point numbers.
|
| 132 |
+
*/
|
| 133 |
+
typedef unsigned int __nv_fp8x4_storage_t;
|
| 134 |
+
|
| 135 |
+
/**
|
| 136 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 137 |
+
* \brief Enumerates the modes applicable when
|
| 138 |
+
* performing a narrowing conversion to \p fp8 destination types.
|
| 139 |
+
*/
|
| 140 |
+
typedef enum __nv_saturation_t {
|
| 141 |
+
/**
|
| 142 |
+
* Means no saturation to finite is performed when conversion
|
| 143 |
+
* results in rounding values outside the range of destination
|
| 144 |
+
* type.
|
| 145 |
+
* NOTE: for fp8 type of e4m3 kind, the results that are larger
|
| 146 |
+
* than the maximum representable finite number of the target
|
| 147 |
+
* format become NaN.
|
| 148 |
+
*/
|
| 149 |
+
__NV_NOSAT,
|
| 150 |
+
/**
|
| 151 |
+
* Means input larger than the maximum representable
|
| 152 |
+
* finite number MAXNORM of the target format round to the
|
| 153 |
+
* MAXNORM of the same sign as input.
|
| 154 |
+
*/
|
| 155 |
+
__NV_SATFINITE,
|
| 156 |
+
} __nv_saturation_t;
|
| 157 |
+
|
| 158 |
+
/**
|
| 159 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 160 |
+
* \brief Enumerates the possible
|
| 161 |
+
* interpretations of the 8-bit values when referring to them as
|
| 162 |
+
* \p fp8 types.
|
| 163 |
+
*/
|
| 164 |
+
typedef enum __nv_fp8_interpretation_t {
|
| 165 |
+
__NV_E4M3, /**< Stands for \p fp8 numbers of \p e4m3 kind. */
|
| 166 |
+
__NV_E5M2, /**< Stands for \p fp8 numbers of \p e5m2 kind. */
|
| 167 |
+
} __nv_fp8_interpretation_t;
|
| 168 |
+
|
| 169 |
+
/* Forward-declaration of C-style APIs */
|
| 170 |
+
|
| 171 |
+
/**
|
| 172 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 173 |
+
* \brief Converts input \p double precision \p x to \p fp8 type of the
|
| 174 |
+
* requested kind using round-to-nearest-even rounding and requested saturation
|
| 175 |
+
* mode.
|
| 176 |
+
*
|
| 177 |
+
* \details Converts input \p x to \p fp8 type of the kind specified by
|
| 178 |
+
* \p fp8_interpretation parameter,
|
| 179 |
+
* using round-to-nearest-even rounding and
|
| 180 |
+
* saturation mode specified by \p saturate parameter.
|
| 181 |
+
*
|
| 182 |
+
* \returns
|
| 183 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 184 |
+
*/
|
| 185 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t
|
| 186 |
+
__nv_cvt_double_to_fp8(const double x, const __nv_saturation_t saturate,
|
| 187 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 188 |
+
|
| 189 |
+
/**
|
| 190 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 191 |
+
* \brief Converts input vector of two \p double precision numbers packed
|
| 192 |
+
* in \p double2 \p x into a vector of two values of \p fp8 type of
|
| 193 |
+
* the requested kind using round-to-nearest-even rounding and requested
|
| 194 |
+
* saturation mode.
|
| 195 |
+
*
|
| 196 |
+
* \details Converts input vector \p x to a vector of two \p fp8 values of the
|
| 197 |
+
* kind specified by \p fp8_interpretation parameter, using
|
| 198 |
+
* round-to-nearest-even rounding and saturation mode specified by \p saturate
|
| 199 |
+
* parameter.
|
| 200 |
+
*
|
| 201 |
+
* \returns
|
| 202 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 203 |
+
*/
|
| 204 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t
|
| 205 |
+
__nv_cvt_double2_to_fp8x2(const double2 x, const __nv_saturation_t saturate,
|
| 206 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 207 |
+
|
| 208 |
+
/**
|
| 209 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 210 |
+
* \brief Converts input \p single precision \p x to \p fp8 type of the
|
| 211 |
+
* requested kind using round-to-nearest-even rounding and requested saturation
|
| 212 |
+
* mode.
|
| 213 |
+
*
|
| 214 |
+
* \details Converts input \p x to \p fp8 type of the kind specified by
|
| 215 |
+
* \p fp8_interpretation parameter,
|
| 216 |
+
* using round-to-nearest-even rounding and
|
| 217 |
+
* saturation mode specified by \p saturate parameter.
|
| 218 |
+
*
|
| 219 |
+
* \returns
|
| 220 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 221 |
+
*/
|
| 222 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t
|
| 223 |
+
__nv_cvt_float_to_fp8(const float x, const __nv_saturation_t saturate,
|
| 224 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 225 |
+
|
| 226 |
+
/**
|
| 227 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 228 |
+
* \brief Converts input vector of two \p single precision numbers packed
|
| 229 |
+
* in \p float2 \p x into a vector of two values of \p fp8 type of
|
| 230 |
+
* the requested kind using round-to-nearest-even rounding and requested
|
| 231 |
+
* saturation mode.
|
| 232 |
+
*
|
| 233 |
+
* \details Converts input vector \p x to a vector of two \p fp8 values of the
|
| 234 |
+
* kind specified by \p fp8_interpretation parameter, using
|
| 235 |
+
* round-to-nearest-even rounding and saturation mode specified by \p saturate
|
| 236 |
+
* parameter.
|
| 237 |
+
*
|
| 238 |
+
* \returns
|
| 239 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 240 |
+
*/
|
| 241 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t
|
| 242 |
+
__nv_cvt_float2_to_fp8x2(const float2 x, const __nv_saturation_t saturate,
|
| 243 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 244 |
+
|
| 245 |
+
/**
|
| 246 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 247 |
+
* \brief Converts input \p half precision \p x to \p fp8 type of the requested
|
| 248 |
+
* kind using round-to-nearest-even rounding and requested saturation mode.
|
| 249 |
+
*
|
| 250 |
+
* \details Converts input \p x to \p fp8 type of the kind specified by
|
| 251 |
+
* \p fp8_interpretation parameter,
|
| 252 |
+
* using round-to-nearest-even rounding and
|
| 253 |
+
* saturation mode specified by \p saturate parameter.
|
| 254 |
+
*
|
| 255 |
+
* \returns
|
| 256 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 257 |
+
*/
|
| 258 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t
|
| 259 |
+
__nv_cvt_halfraw_to_fp8(const __half_raw x, const __nv_saturation_t saturate,
|
| 260 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 261 |
+
|
| 262 |
+
/**
|
| 263 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 264 |
+
* \brief Converts input vector of two \p half precision numbers packed
|
| 265 |
+
* in \p __half2_raw \p x into a vector of two values of \p fp8 type of
|
| 266 |
+
* the requested kind using round-to-nearest-even rounding and requested
|
| 267 |
+
* saturation mode.
|
| 268 |
+
*
|
| 269 |
+
* \details Converts input vector \p x to a vector of two \p fp8 values of the
|
| 270 |
+
* kind specified by \p fp8_interpretation parameter, using
|
| 271 |
+
* round-to-nearest-even rounding and saturation mode specified by \p saturate
|
| 272 |
+
* parameter.
|
| 273 |
+
*
|
| 274 |
+
* \returns
|
| 275 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 276 |
+
*/
|
| 277 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_halfraw2_to_fp8x2(
|
| 278 |
+
const __half2_raw x, const __nv_saturation_t saturate,
|
| 279 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 280 |
+
|
| 281 |
+
/**
|
| 282 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 283 |
+
* \brief Converts input \p nv_bfloat16 precision \p x to \p fp8 type of the
|
| 284 |
+
* requested kind using round-to-nearest-even rounding and requested saturation
|
| 285 |
+
* mode.
|
| 286 |
+
*
|
| 287 |
+
* \details Converts input \p x to \p fp8 type of the kind specified by
|
| 288 |
+
* \p fp8_interpretation parameter,
|
| 289 |
+
* using round-to-nearest-even rounding and
|
| 290 |
+
* saturation mode specified by \p saturate parameter.
|
| 291 |
+
*
|
| 292 |
+
* \returns
|
| 293 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 294 |
+
*/
|
| 295 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_bfloat16raw_to_fp8(
|
| 296 |
+
const __nv_bfloat16_raw x, const __nv_saturation_t saturate,
|
| 297 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 298 |
+
|
| 299 |
+
/**
|
| 300 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 301 |
+
* \brief Converts input vector of two \p nv_bfloat16 precision numbers packed
|
| 302 |
+
* in \p __nv_bfloat162_raw \p x into a vector of two values of \p fp8 type of
|
| 303 |
+
* the requested kind using round-to-nearest-even rounding and requested
|
| 304 |
+
* saturation mode.
|
| 305 |
+
*
|
| 306 |
+
* \details Converts input vector \p x to a vector of two \p fp8 values of the
|
| 307 |
+
* kind specified by \p fp8_interpretation parameter, using
|
| 308 |
+
* round-to-nearest-even rounding and saturation mode specified by \p saturate
|
| 309 |
+
* parameter.
|
| 310 |
+
*
|
| 311 |
+
* \returns
|
| 312 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 313 |
+
*/
|
| 314 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t
|
| 315 |
+
__nv_cvt_bfloat16raw2_to_fp8x2(
|
| 316 |
+
const __nv_bfloat162_raw x, const __nv_saturation_t saturate,
|
| 317 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 318 |
+
|
| 319 |
+
/**
|
| 320 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 321 |
+
* \brief Converts input \p fp8 \p x of the specified kind
|
| 322 |
+
* to \p half precision.
|
| 323 |
+
*
|
| 324 |
+
* \details Converts input \p x of \p fp8 type of the kind specified by
|
| 325 |
+
* \p fp8_interpretation parameter
|
| 326 |
+
* to \p half precision.
|
| 327 |
+
*
|
| 328 |
+
* \returns
|
| 329 |
+
* - The \p __half_raw value holds the result of conversion.
|
| 330 |
+
*/
|
| 331 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __half_raw
|
| 332 |
+
__nv_cvt_fp8_to_halfraw(const __nv_fp8_storage_t x,
|
| 333 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 334 |
+
/**
|
| 335 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 336 |
+
* \brief Converts input vector of two \p fp8 values of the specified kind
|
| 337 |
+
* to a vector of two \p half precision values packed in \p __half2_raw
|
| 338 |
+
* structure.
|
| 339 |
+
*
|
| 340 |
+
* \details Converts input vector \p x of \p fp8 type of the kind specified by
|
| 341 |
+
* \p fp8_interpretation parameter
|
| 342 |
+
* to a vector of two \p half precision values and returns as \p __half2_raw
|
| 343 |
+
* structure.
|
| 344 |
+
*
|
| 345 |
+
* \returns
|
| 346 |
+
* - The \p __half2_raw value holds the result of conversion.
|
| 347 |
+
*/
|
| 348 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __half2_raw
|
| 349 |
+
__nv_cvt_fp8x2_to_halfraw2(const __nv_fp8x2_storage_t x,
|
| 350 |
+
const __nv_fp8_interpretation_t fp8_interpretation);
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
/**
|
| 354 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 355 |
+
* \brief Converts input \p bfloat16 input into a scaling factor of \p e8m0 kind.
|
| 356 |
+
*
|
| 357 |
+
* \details Input number's absolute value is rounded to the closest power of two in the
|
| 358 |
+
* direction specified via \p rounding parameter. Rounded results that are
|
| 359 |
+
* smaller than the smallest representable target format number 2^-127 are then
|
| 360 |
+
* clipped to 2^-127. Results that are larger than the largest representable
|
| 361 |
+
* target format number 2^127 are either clipped to 2^127 if \p saturate equals
|
| 362 |
+
* to \p __NV_SATFINITE, or convert to \p NaN otherwise. \p NaN inputs convert
|
| 363 |
+
* into \p NaN output, encoded as \p 0xFF in the target format.
|
| 364 |
+
*
|
| 365 |
+
* \returns
|
| 366 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 367 |
+
*/
|
| 368 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_bfloat16raw_to_e8m0(const __nv_bfloat16_raw x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 369 |
+
|
| 370 |
+
/**
|
| 371 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 372 |
+
* \brief Converts a pair of \p bfloat16 values into a pair of scaling factors of \p e8m0 kind.
|
| 373 |
+
*
|
| 374 |
+
* \see __nv_cvt_bfloat16raw_to_e8m0() for details of conversion.
|
| 375 |
+
*
|
| 376 |
+
* \returns
|
| 377 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 378 |
+
*/
|
| 379 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_bfloat162raw_to_e8m0x2(const __nv_bfloat162_raw x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 380 |
+
|
| 381 |
+
/**
|
| 382 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 383 |
+
* \brief Converts input \p float value into a scaling factor of \p e8m0 kind.
|
| 384 |
+
*
|
| 385 |
+
* \see __nv_cvt_bfloat16raw_to_e8m0() for details of conversion.
|
| 386 |
+
*
|
| 387 |
+
* \returns
|
| 388 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 389 |
+
*/
|
| 390 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_float_to_e8m0(const float x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 391 |
+
|
| 392 |
+
/**
|
| 393 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 394 |
+
* \brief Converts a pair of \p float values into a pair of scaling factors of \p e8m0 kind.
|
| 395 |
+
*
|
| 396 |
+
* \see __nv_cvt_bfloat16raw_to_e8m0() for details of conversion.
|
| 397 |
+
*
|
| 398 |
+
* \returns
|
| 399 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 400 |
+
*/
|
| 401 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_float2_to_e8m0x2(const float2 x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 402 |
+
|
| 403 |
+
/**
|
| 404 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 405 |
+
* \brief Converts input \p double value into a scaling factor of \p e8m0 kind.
|
| 406 |
+
*
|
| 407 |
+
* \see __nv_cvt_bfloat16raw_to_e8m0() for details of conversion.
|
| 408 |
+
*
|
| 409 |
+
* \returns
|
| 410 |
+
* - The \p __nv_fp8_storage_t value holds the result of conversion.
|
| 411 |
+
*/
|
| 412 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_double_to_e8m0(const double x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 413 |
+
|
| 414 |
+
/**
|
| 415 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 416 |
+
* \brief Converts a pair of \p double values into a pair of scaling factors of \p e8m0 kind.
|
| 417 |
+
*
|
| 418 |
+
* \see __nv_cvt_bfloat16raw_to_e8m0() for details of conversion.
|
| 419 |
+
*
|
| 420 |
+
* \returns
|
| 421 |
+
* - The \p __nv_fp8x2_storage_t value holds the result of conversion.
|
| 422 |
+
*/
|
| 423 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_double2_to_e8m0x2(const double2 x, const __nv_saturation_t saturate, const enum cudaRoundMode rounding);
|
| 424 |
+
|
| 425 |
+
/**
|
| 426 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 427 |
+
* \brief Converts input scaling factor value of \p e8m0 kind into \p bfloat16.
|
| 428 |
+
*
|
| 429 |
+
* \details Input scales are exact powers of two or a \p NaN value,
|
| 430 |
+
* also representable in the target format.
|
| 431 |
+
*
|
| 432 |
+
* \returns
|
| 433 |
+
* - The \p __nv_bfloat16_raw value holds the result of conversion.
|
| 434 |
+
*/
|
| 435 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_bfloat16_raw __nv_cvt_e8m0_to_bf16raw(const __nv_fp8_storage_t x);
|
| 436 |
+
|
| 437 |
+
/**
|
| 438 |
+
* \ingroup CUDA_MATH_FP8_MISC
|
| 439 |
+
* \brief Converts input pair of scaling factors of \p e8m0 kind into a pair of \p bfloat16 values.
|
| 440 |
+
*
|
| 441 |
+
* \returns
|
| 442 |
+
* - The \p __nv_bfloat162_raw value holds the result of conversion.
|
| 443 |
+
*/
|
| 444 |
+
__CUDA_HOSTDEVICE_FP8_DECL__ __nv_bfloat162_raw __nv_cvt_e8m0x2_to_bf162raw(const __nv_fp8x2_storage_t x);
|
| 445 |
+
|
| 446 |
+
#if defined(__cplusplus)
|
| 447 |
+
|
| 448 |
+
#define __CUDA_FP8_TYPES_EXIST__
|
| 449 |
+
|
| 450 |
+
/* Forward-declaration of structures defined in "cuda_fp8.hpp" */
|
| 451 |
+
struct __nv_fp8_e5m2;
|
| 452 |
+
struct __nv_fp8x2_e5m2;
|
| 453 |
+
struct __nv_fp8x4_e5m2;
|
| 454 |
+
|
| 455 |
+
struct __nv_fp8_e4m3;
|
| 456 |
+
struct __nv_fp8x2_e4m3;
|
| 457 |
+
struct __nv_fp8x4_e4m3;
|
| 458 |
+
|
| 459 |
+
struct __nv_fp8_e8m0;
|
| 460 |
+
struct __nv_fp8x2_e8m0;
|
| 461 |
+
struct __nv_fp8x4_e8m0;
|
| 462 |
+
|
| 463 |
+
#endif /* defined(__cplusplus) */
|
| 464 |
+
|
| 465 |
+
#include "cuda_fp8.hpp"
|
| 466 |
+
|
| 467 |
+
#undef __CUDA_FP8_DECL__
|
| 468 |
+
#undef __CUDA_HOSTDEVICE_FP8__
|
| 469 |
+
#undef __CUDA_HOSTDEVICE_FP8_DECL__
|
| 470 |
+
|
| 471 |
+
#if defined(__CPP_VERSION_AT_LEAST_11_FP8)
|
| 472 |
+
#undef __CPP_VERSION_AT_LEAST_11_FP8
|
| 473 |
+
#endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */
|
| 474 |
+
|
| 475 |
+
#endif /* end of include guard: __CUDA_FP8_H__ */
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_occupancy.h
ADDED
|
@@ -0,0 +1,2094 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/**
|
| 51 |
+
* CUDA Occupancy Calculator
|
| 52 |
+
*
|
| 53 |
+
* NAME
|
| 54 |
+
*
|
| 55 |
+
* cudaOccMaxActiveBlocksPerMultiprocessor,
|
| 56 |
+
* cudaOccMaxPotentialOccupancyBlockSize,
|
| 57 |
+
* cudaOccMaxPotentialOccupancyBlockSizeVariableSMem
|
| 58 |
+
* cudaOccAvailableDynamicSMemPerBlock
|
| 59 |
+
*
|
| 60 |
+
* DESCRIPTION
|
| 61 |
+
*
|
| 62 |
+
* The CUDA occupancy calculator provides a standalone, programmatical
|
| 63 |
+
* interface to compute the occupancy of a function on a device. It can also
|
| 64 |
+
* provide occupancy-oriented launch configuration suggestions.
|
| 65 |
+
*
|
| 66 |
+
* The function and device are defined by the user through
|
| 67 |
+
* cudaOccFuncAttributes, cudaOccDeviceProp, and cudaOccDeviceState
|
| 68 |
+
* structures. All APIs require all 3 of them.
|
| 69 |
+
*
|
| 70 |
+
* See the structure definition for more details about the device / function
|
| 71 |
+
* descriptors.
|
| 72 |
+
*
|
| 73 |
+
* See each API's prototype for API usage.
|
| 74 |
+
*
|
| 75 |
+
* COMPATIBILITY
|
| 76 |
+
*
|
| 77 |
+
* The occupancy calculator will be updated on each major CUDA toolkit
|
| 78 |
+
* release. It does not provide forward compatibility, i.e. new hardwares
|
| 79 |
+
* released after this implementation's release will not be supported.
|
| 80 |
+
*
|
| 81 |
+
* NOTE
|
| 82 |
+
*
|
| 83 |
+
* If there is access to CUDA runtime, and the sole intent is to calculate
|
| 84 |
+
* occupancy related values on one of the accessible CUDA devices, using CUDA
|
| 85 |
+
* runtime's occupancy calculation APIs is recommended.
|
| 86 |
+
*
|
| 87 |
+
*/
|
| 88 |
+
|
| 89 |
+
#ifndef __cuda_occupancy_h__
|
| 90 |
+
#define __cuda_occupancy_h__
|
| 91 |
+
|
| 92 |
+
#include <stddef.h>
|
| 93 |
+
#include <limits.h>
|
| 94 |
+
#include <string.h>
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
// __OCC_INLINE will be undefined at the end of this header
|
| 98 |
+
//
|
| 99 |
+
#ifdef __CUDACC__
|
| 100 |
+
#define __OCC_INLINE inline __host__ __device__
|
| 101 |
+
#elif defined _MSC_VER
|
| 102 |
+
#define __OCC_INLINE __inline
|
| 103 |
+
#else // GNUCC assumed
|
| 104 |
+
#define __OCC_INLINE inline
|
| 105 |
+
#endif
|
| 106 |
+
|
| 107 |
+
enum cudaOccError_enum {
|
| 108 |
+
CUDA_OCC_SUCCESS = 0, // no error encountered
|
| 109 |
+
CUDA_OCC_ERROR_INVALID_INPUT = 1, // input parameter is invalid
|
| 110 |
+
CUDA_OCC_ERROR_UNKNOWN_DEVICE = 2, // requested device is not supported in
|
| 111 |
+
// current implementation or device is
|
| 112 |
+
// invalid
|
| 113 |
+
};
|
| 114 |
+
typedef enum cudaOccError_enum cudaOccError;
|
| 115 |
+
|
| 116 |
+
typedef struct cudaOccResult cudaOccResult;
|
| 117 |
+
typedef struct cudaOccDeviceProp cudaOccDeviceProp;
|
| 118 |
+
typedef struct cudaOccFuncAttributes cudaOccFuncAttributes;
|
| 119 |
+
typedef struct cudaOccDeviceState cudaOccDeviceState;
|
| 120 |
+
|
| 121 |
+
/**
|
| 122 |
+
* The CUDA occupancy calculator computes the occupancy of the function
|
| 123 |
+
* described by attributes with the given block size (blockSize), static device
|
| 124 |
+
* properties (properties), dynamic device states (states) and per-block dynamic
|
| 125 |
+
* shared memory allocation (dynamicSMemSize) in bytes, and output it through
|
| 126 |
+
* result along with other useful information. The occupancy is computed in
|
| 127 |
+
* terms of the maximum number of active blocks per multiprocessor. The user can
|
| 128 |
+
* then convert it to other metrics, such as number of active warps.
|
| 129 |
+
*
|
| 130 |
+
* RETURN VALUE
|
| 131 |
+
*
|
| 132 |
+
* The occupancy and related information is returned through result.
|
| 133 |
+
*
|
| 134 |
+
* If result->activeBlocksPerMultiprocessor is 0, then the given parameter
|
| 135 |
+
* combination cannot run on the device.
|
| 136 |
+
*
|
| 137 |
+
* ERRORS
|
| 138 |
+
*
|
| 139 |
+
* CUDA_OCC_ERROR_INVALID_INPUT input parameter is invalid.
|
| 140 |
+
* CUDA_OCC_ERROR_UNKNOWN_DEVICE requested device is not supported in
|
| 141 |
+
* current implementation or device is invalid
|
| 142 |
+
*/
|
| 143 |
+
static __OCC_INLINE
|
| 144 |
+
cudaOccError cudaOccMaxActiveBlocksPerMultiprocessor(
|
| 145 |
+
cudaOccResult *result, // out
|
| 146 |
+
const cudaOccDeviceProp *properties, // in
|
| 147 |
+
const cudaOccFuncAttributes *attributes, // in
|
| 148 |
+
const cudaOccDeviceState *state, // in
|
| 149 |
+
int blockSize, // in
|
| 150 |
+
size_t dynamicSmemSize); // in
|
| 151 |
+
|
| 152 |
+
/**
|
| 153 |
+
* The CUDA launch configurator C API suggests a grid / block size pair (in
|
| 154 |
+
* minGridSize and blockSize) that achieves the best potential occupancy
|
| 155 |
+
* (i.e. maximum number of active warps with the smallest number of blocks) for
|
| 156 |
+
* the given function described by attributes, on a device described by
|
| 157 |
+
* properties with settings in state.
|
| 158 |
+
*
|
| 159 |
+
* If per-block dynamic shared memory allocation is not needed, the user should
|
| 160 |
+
* leave both blockSizeToDynamicSMemSize and dynamicSMemSize as 0.
|
| 161 |
+
*
|
| 162 |
+
* If per-block dynamic shared memory allocation is needed, then if the dynamic
|
| 163 |
+
* shared memory size is constant regardless of block size, the size should be
|
| 164 |
+
* passed through dynamicSMemSize, and blockSizeToDynamicSMemSize should be
|
| 165 |
+
* NULL.
|
| 166 |
+
*
|
| 167 |
+
* Otherwise, if the per-block dynamic shared memory size varies with different
|
| 168 |
+
* block sizes, the user needs to provide a pointer to an unary function through
|
| 169 |
+
* blockSizeToDynamicSMemSize that computes the dynamic shared memory needed by
|
| 170 |
+
* a block of the function for any given block size. dynamicSMemSize is
|
| 171 |
+
* ignored. An example signature is:
|
| 172 |
+
*
|
| 173 |
+
* // Take block size, returns dynamic shared memory needed
|
| 174 |
+
* size_t blockToSmem(int blockSize);
|
| 175 |
+
*
|
| 176 |
+
* RETURN VALUE
|
| 177 |
+
*
|
| 178 |
+
* The suggested block size and the minimum number of blocks needed to achieve
|
| 179 |
+
* the maximum occupancy are returned through blockSize and minGridSize.
|
| 180 |
+
*
|
| 181 |
+
* If *blockSize is 0, then the given combination cannot run on the device.
|
| 182 |
+
*
|
| 183 |
+
* ERRORS
|
| 184 |
+
*
|
| 185 |
+
* CUDA_OCC_ERROR_INVALID_INPUT input parameter is invalid.
|
| 186 |
+
* CUDA_OCC_ERROR_UNKNOWN_DEVICE requested device is not supported in
|
| 187 |
+
* current implementation or device is invalid
|
| 188 |
+
*
|
| 189 |
+
*/
|
| 190 |
+
static __OCC_INLINE
|
| 191 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSize(
|
| 192 |
+
int *minGridSize, // out
|
| 193 |
+
int *blockSize, // out
|
| 194 |
+
const cudaOccDeviceProp *properties, // in
|
| 195 |
+
const cudaOccFuncAttributes *attributes, // in
|
| 196 |
+
const cudaOccDeviceState *state, // in
|
| 197 |
+
size_t (*blockSizeToDynamicSMemSize)(int), // in
|
| 198 |
+
size_t dynamicSMemSize); // in
|
| 199 |
+
|
| 200 |
+
/**
|
| 201 |
+
* The CUDA launch configurator C++ API suggests a grid / block size pair (in
|
| 202 |
+
* minGridSize and blockSize) that achieves the best potential occupancy
|
| 203 |
+
* (i.e. the maximum number of active warps with the smallest number of blocks)
|
| 204 |
+
* for the given function described by attributes, on a device described by
|
| 205 |
+
* properties with settings in state.
|
| 206 |
+
*
|
| 207 |
+
* If per-block dynamic shared memory allocation is 0 or constant regardless of
|
| 208 |
+
* block size, the user can use cudaOccMaxPotentialOccupancyBlockSize to
|
| 209 |
+
* configure the launch. A constant dynamic shared memory allocation size in
|
| 210 |
+
* bytes can be passed through dynamicSMemSize.
|
| 211 |
+
*
|
| 212 |
+
* Otherwise, if the per-block dynamic shared memory size varies with different
|
| 213 |
+
* block sizes, the user needs to use
|
| 214 |
+
* cudaOccMaxPotentialOccupancyBlockSizeVariableSmem instead, and provide a
|
| 215 |
+
* functor / pointer to an unary function (blockSizeToDynamicSMemSize) that
|
| 216 |
+
* computes the dynamic shared memory needed by func for any given block
|
| 217 |
+
* size. An example signature is:
|
| 218 |
+
*
|
| 219 |
+
* // Take block size, returns per-block dynamic shared memory needed
|
| 220 |
+
* size_t blockToSmem(int blockSize);
|
| 221 |
+
*
|
| 222 |
+
* RETURN VALUE
|
| 223 |
+
*
|
| 224 |
+
* The suggested block size and the minimum number of blocks needed to achieve
|
| 225 |
+
* the maximum occupancy are returned through blockSize and minGridSize.
|
| 226 |
+
*
|
| 227 |
+
* If *blockSize is 0, then the given combination cannot run on the device.
|
| 228 |
+
*
|
| 229 |
+
* ERRORS
|
| 230 |
+
*
|
| 231 |
+
* CUDA_OCC_ERROR_INVALID_INPUT input parameter is invalid.
|
| 232 |
+
* CUDA_OCC_ERROR_UNKNOWN_DEVICE requested device is not supported in
|
| 233 |
+
* current implementation or device is invalid
|
| 234 |
+
*
|
| 235 |
+
*/
|
| 236 |
+
|
| 237 |
+
#if defined(__cplusplus)
|
| 238 |
+
namespace {
|
| 239 |
+
|
| 240 |
+
__OCC_INLINE
|
| 241 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSize(
|
| 242 |
+
int *minGridSize, // out
|
| 243 |
+
int *blockSize, // out
|
| 244 |
+
const cudaOccDeviceProp *properties, // in
|
| 245 |
+
const cudaOccFuncAttributes *attributes, // in
|
| 246 |
+
const cudaOccDeviceState *state, // in
|
| 247 |
+
size_t dynamicSMemSize = 0); // in
|
| 248 |
+
|
| 249 |
+
template <typename UnaryFunction>
|
| 250 |
+
__OCC_INLINE
|
| 251 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSizeVariableSMem(
|
| 252 |
+
int *minGridSize, // out
|
| 253 |
+
int *blockSize, // out
|
| 254 |
+
const cudaOccDeviceProp *properties, // in
|
| 255 |
+
const cudaOccFuncAttributes *attributes, // in
|
| 256 |
+
const cudaOccDeviceState *state, // in
|
| 257 |
+
UnaryFunction blockSizeToDynamicSMemSize); // in
|
| 258 |
+
|
| 259 |
+
} // namespace anonymous
|
| 260 |
+
#endif // defined(__cplusplus)
|
| 261 |
+
|
| 262 |
+
/**
|
| 263 |
+
*
|
| 264 |
+
* The CUDA dynamic shared memory calculator computes the maximum size of
|
| 265 |
+
* per-block dynamic shared memory if we want to place numBlocks blocks
|
| 266 |
+
* on an SM.
|
| 267 |
+
*
|
| 268 |
+
* RETURN VALUE
|
| 269 |
+
*
|
| 270 |
+
* Returns in *dynamicSmemSize the maximum size of dynamic shared memory to allow
|
| 271 |
+
* numBlocks blocks per SM.
|
| 272 |
+
*
|
| 273 |
+
* ERRORS
|
| 274 |
+
*
|
| 275 |
+
* CUDA_OCC_ERROR_INVALID_INPUT input parameter is invalid.
|
| 276 |
+
* CUDA_OCC_ERROR_UNKNOWN_DEVICE requested device is not supported in
|
| 277 |
+
* current implementation or device is invalid
|
| 278 |
+
*
|
| 279 |
+
*/
|
| 280 |
+
static __OCC_INLINE
|
| 281 |
+
cudaOccError cudaOccAvailableDynamicSMemPerBlock(
|
| 282 |
+
size_t *dynamicSmemSize,
|
| 283 |
+
const cudaOccDeviceProp *properties,
|
| 284 |
+
const cudaOccFuncAttributes *attributes,
|
| 285 |
+
const cudaOccDeviceState *state,
|
| 286 |
+
int numBlocks,
|
| 287 |
+
int blockSize);
|
| 288 |
+
|
| 289 |
+
/**
|
| 290 |
+
* Data structures
|
| 291 |
+
*
|
| 292 |
+
* These structures are subject to change for future architecture and CUDA
|
| 293 |
+
* releases. C users should initialize the structure as {0}.
|
| 294 |
+
*
|
| 295 |
+
*/
|
| 296 |
+
|
| 297 |
+
/**
|
| 298 |
+
* Device descriptor
|
| 299 |
+
*
|
| 300 |
+
* This structure describes a device.
|
| 301 |
+
*/
|
| 302 |
+
struct cudaOccDeviceProp {
|
| 303 |
+
int computeMajor; // Compute capability major version
|
| 304 |
+
int computeMinor; // Compute capability minor
|
| 305 |
+
// version. None supported minor version
|
| 306 |
+
// may cause error
|
| 307 |
+
int maxThreadsPerBlock; // Maximum number of threads per block
|
| 308 |
+
int maxThreadsPerMultiprocessor; // Maximum number of threads per SM
|
| 309 |
+
// i.e. (Max. number of warps) x (warp
|
| 310 |
+
// size)
|
| 311 |
+
int regsPerBlock; // Maximum number of registers per block
|
| 312 |
+
int regsPerMultiprocessor; // Maximum number of registers per SM
|
| 313 |
+
int warpSize; // Warp size
|
| 314 |
+
size_t sharedMemPerBlock; // Maximum shared memory size per block
|
| 315 |
+
size_t sharedMemPerMultiprocessor; // Maximum shared memory size per SM
|
| 316 |
+
int numSms; // Number of SMs available
|
| 317 |
+
size_t sharedMemPerBlockOptin; // Maximum optin shared memory size per block
|
| 318 |
+
size_t reservedSharedMemPerBlock; // Shared memory per block reserved by driver
|
| 319 |
+
|
| 320 |
+
#ifdef __cplusplus
|
| 321 |
+
// This structure can be converted from a cudaDeviceProp structure for users
|
| 322 |
+
// that use this header in their CUDA applications.
|
| 323 |
+
//
|
| 324 |
+
// If the application have access to the CUDA Runtime API, the application
|
| 325 |
+
// can obtain the device properties of a CUDA device through
|
| 326 |
+
// cudaGetDeviceProperties, and initialize a cudaOccDeviceProp with the
|
| 327 |
+
// cudaDeviceProp structure.
|
| 328 |
+
//
|
| 329 |
+
// Example:
|
| 330 |
+
/*
|
| 331 |
+
{
|
| 332 |
+
cudaDeviceProp prop;
|
| 333 |
+
|
| 334 |
+
cudaGetDeviceProperties(&prop, ...);
|
| 335 |
+
|
| 336 |
+
cudaOccDeviceProp occProp = prop;
|
| 337 |
+
|
| 338 |
+
...
|
| 339 |
+
|
| 340 |
+
cudaOccMaxPotentialOccupancyBlockSize(..., &occProp, ...);
|
| 341 |
+
}
|
| 342 |
+
*/
|
| 343 |
+
//
|
| 344 |
+
template<typename DeviceProp>
|
| 345 |
+
__OCC_INLINE
|
| 346 |
+
cudaOccDeviceProp(const DeviceProp &props)
|
| 347 |
+
: computeMajor (props.major),
|
| 348 |
+
computeMinor (props.minor),
|
| 349 |
+
maxThreadsPerBlock (props.maxThreadsPerBlock),
|
| 350 |
+
maxThreadsPerMultiprocessor (props.maxThreadsPerMultiProcessor),
|
| 351 |
+
regsPerBlock (props.regsPerBlock),
|
| 352 |
+
regsPerMultiprocessor (props.regsPerMultiprocessor),
|
| 353 |
+
warpSize (props.warpSize),
|
| 354 |
+
sharedMemPerBlock (props.sharedMemPerBlock),
|
| 355 |
+
sharedMemPerMultiprocessor (props.sharedMemPerMultiprocessor),
|
| 356 |
+
numSms (props.multiProcessorCount),
|
| 357 |
+
sharedMemPerBlockOptin (props.sharedMemPerBlockOptin),
|
| 358 |
+
reservedSharedMemPerBlock (props.reservedSharedMemPerBlock)
|
| 359 |
+
{}
|
| 360 |
+
|
| 361 |
+
__OCC_INLINE
|
| 362 |
+
cudaOccDeviceProp()
|
| 363 |
+
: computeMajor (0),
|
| 364 |
+
computeMinor (0),
|
| 365 |
+
maxThreadsPerBlock (0),
|
| 366 |
+
maxThreadsPerMultiprocessor (0),
|
| 367 |
+
regsPerBlock (0),
|
| 368 |
+
regsPerMultiprocessor (0),
|
| 369 |
+
warpSize (0),
|
| 370 |
+
sharedMemPerBlock (0),
|
| 371 |
+
sharedMemPerMultiprocessor (0),
|
| 372 |
+
numSms (0),
|
| 373 |
+
sharedMemPerBlockOptin (0),
|
| 374 |
+
reservedSharedMemPerBlock (0)
|
| 375 |
+
{}
|
| 376 |
+
#endif // __cplusplus
|
| 377 |
+
};
|
| 378 |
+
|
| 379 |
+
/**
|
| 380 |
+
* Partitioned global caching option
|
| 381 |
+
*/
|
| 382 |
+
typedef enum cudaOccPartitionedGCConfig_enum {
|
| 383 |
+
PARTITIONED_GC_OFF, // Disable partitioned global caching
|
| 384 |
+
PARTITIONED_GC_ON, // Prefer partitioned global caching
|
| 385 |
+
PARTITIONED_GC_ON_STRICT // Force partitioned global caching
|
| 386 |
+
} cudaOccPartitionedGCConfig;
|
| 387 |
+
|
| 388 |
+
/**
|
| 389 |
+
* Per function opt in maximum dynamic shared memory limit
|
| 390 |
+
*/
|
| 391 |
+
typedef enum cudaOccFuncShmemConfig_enum {
|
| 392 |
+
FUNC_SHMEM_LIMIT_DEFAULT, // Default shmem limit
|
| 393 |
+
FUNC_SHMEM_LIMIT_OPTIN, // Use the optin shmem limit
|
| 394 |
+
} cudaOccFuncShmemConfig;
|
| 395 |
+
|
| 396 |
+
/**
|
| 397 |
+
* Function descriptor
|
| 398 |
+
*
|
| 399 |
+
* This structure describes a CUDA function.
|
| 400 |
+
*/
|
| 401 |
+
struct cudaOccFuncAttributes {
|
| 402 |
+
int maxThreadsPerBlock; // Maximum block size the function can work with. If
|
| 403 |
+
// unlimited, use INT_MAX or any value greater than
|
| 404 |
+
// or equal to maxThreadsPerBlock of the device
|
| 405 |
+
int numRegs; // Number of registers used. When the function is
|
| 406 |
+
// launched on device, the register count may change
|
| 407 |
+
// due to internal tools requirements.
|
| 408 |
+
size_t sharedSizeBytes; // Number of static shared memory used
|
| 409 |
+
|
| 410 |
+
cudaOccPartitionedGCConfig partitionedGCConfig;
|
| 411 |
+
// Partitioned global caching is required to enable
|
| 412 |
+
// caching on certain chips, such as sm_52
|
| 413 |
+
// devices. Partitioned global caching can be
|
| 414 |
+
// automatically disabled if the occupancy
|
| 415 |
+
// requirement of the launch cannot support caching.
|
| 416 |
+
//
|
| 417 |
+
// To override this behavior with caching on and
|
| 418 |
+
// calculate occupancy strictly according to the
|
| 419 |
+
// preference, set partitionedGCConfig to
|
| 420 |
+
// PARTITIONED_GC_ON_STRICT. This is especially
|
| 421 |
+
// useful for experimenting and finding launch
|
| 422 |
+
// configurations (MaxPotentialOccupancyBlockSize)
|
| 423 |
+
// that allow global caching to take effect.
|
| 424 |
+
//
|
| 425 |
+
// This flag only affects the occupancy calculation.
|
| 426 |
+
|
| 427 |
+
cudaOccFuncShmemConfig shmemLimitConfig;
|
| 428 |
+
// Certain chips like sm_70 allow a user to opt into
|
| 429 |
+
// a higher per block limit of dynamic shared memory
|
| 430 |
+
// This optin is performed on a per function basis
|
| 431 |
+
// using the cuFuncSetAttribute function
|
| 432 |
+
|
| 433 |
+
size_t maxDynamicSharedSizeBytes;
|
| 434 |
+
// User set limit on maximum dynamic shared memory
|
| 435 |
+
// usable by the kernel
|
| 436 |
+
// This limit is set using the cuFuncSetAttribute
|
| 437 |
+
// function.
|
| 438 |
+
|
| 439 |
+
int numBlockBarriers; // Number of block barriers used (default to 1)
|
| 440 |
+
#ifdef __cplusplus
|
| 441 |
+
// This structure can be converted from a cudaFuncAttributes structure for
|
| 442 |
+
// users that use this header in their CUDA applications.
|
| 443 |
+
//
|
| 444 |
+
// If the application have access to the CUDA Runtime API, the application
|
| 445 |
+
// can obtain the function attributes of a CUDA kernel function through
|
| 446 |
+
// cudaFuncGetAttributes, and initialize a cudaOccFuncAttributes with the
|
| 447 |
+
// cudaFuncAttributes structure.
|
| 448 |
+
//
|
| 449 |
+
// Example:
|
| 450 |
+
/*
|
| 451 |
+
__global__ void foo() {...}
|
| 452 |
+
|
| 453 |
+
...
|
| 454 |
+
|
| 455 |
+
{
|
| 456 |
+
cudaFuncAttributes attr;
|
| 457 |
+
|
| 458 |
+
cudaFuncGetAttributes(&attr, foo);
|
| 459 |
+
|
| 460 |
+
cudaOccFuncAttributes occAttr = attr;
|
| 461 |
+
|
| 462 |
+
...
|
| 463 |
+
|
| 464 |
+
cudaOccMaxPotentialOccupancyBlockSize(..., &occAttr, ...);
|
| 465 |
+
}
|
| 466 |
+
*/
|
| 467 |
+
//
|
| 468 |
+
template<typename FuncAttributes>
|
| 469 |
+
__OCC_INLINE
|
| 470 |
+
cudaOccFuncAttributes(const FuncAttributes &attr)
|
| 471 |
+
: maxThreadsPerBlock (attr.maxThreadsPerBlock),
|
| 472 |
+
numRegs (attr.numRegs),
|
| 473 |
+
sharedSizeBytes (attr.sharedSizeBytes),
|
| 474 |
+
partitionedGCConfig (PARTITIONED_GC_OFF),
|
| 475 |
+
shmemLimitConfig (FUNC_SHMEM_LIMIT_OPTIN),
|
| 476 |
+
maxDynamicSharedSizeBytes (attr.maxDynamicSharedSizeBytes),
|
| 477 |
+
numBlockBarriers (1)
|
| 478 |
+
{}
|
| 479 |
+
|
| 480 |
+
__OCC_INLINE
|
| 481 |
+
cudaOccFuncAttributes()
|
| 482 |
+
: maxThreadsPerBlock (0),
|
| 483 |
+
numRegs (0),
|
| 484 |
+
sharedSizeBytes (0),
|
| 485 |
+
partitionedGCConfig (PARTITIONED_GC_OFF),
|
| 486 |
+
shmemLimitConfig (FUNC_SHMEM_LIMIT_DEFAULT),
|
| 487 |
+
maxDynamicSharedSizeBytes (0),
|
| 488 |
+
numBlockBarriers (0)
|
| 489 |
+
{}
|
| 490 |
+
#endif
|
| 491 |
+
};
|
| 492 |
+
|
| 493 |
+
typedef enum cudaOccCacheConfig_enum {
|
| 494 |
+
CACHE_PREFER_NONE = 0x00, // no preference for shared memory or L1 (default)
|
| 495 |
+
CACHE_PREFER_SHARED = 0x01, // prefer larger shared memory and smaller L1 cache
|
| 496 |
+
CACHE_PREFER_L1 = 0x02, // prefer larger L1 cache and smaller shared memory
|
| 497 |
+
CACHE_PREFER_EQUAL = 0x03 // prefer equal sized L1 cache and shared memory
|
| 498 |
+
} cudaOccCacheConfig;
|
| 499 |
+
|
| 500 |
+
typedef enum cudaOccCarveoutConfig_enum {
|
| 501 |
+
SHAREDMEM_CARVEOUT_DEFAULT = -1, // no preference for shared memory or L1 (default)
|
| 502 |
+
SHAREDMEM_CARVEOUT_MAX_SHARED = 100, // prefer maximum available shared memory, minimum L1 cache
|
| 503 |
+
SHAREDMEM_CARVEOUT_MAX_L1 = 0, // prefer maximum available L1 cache, minimum shared memory
|
| 504 |
+
SHAREDMEM_CARVEOUT_HALF = 50 // prefer half of maximum available shared memory, with the rest as L1 cache
|
| 505 |
+
} cudaOccCarveoutConfig;
|
| 506 |
+
|
| 507 |
+
/**
|
| 508 |
+
* Device state descriptor
|
| 509 |
+
*
|
| 510 |
+
* This structure describes device settings that affect occupancy calculation.
|
| 511 |
+
*/
|
| 512 |
+
struct cudaOccDeviceState
|
| 513 |
+
{
|
| 514 |
+
// Cache / shared memory split preference. Deprecated on Volta
|
| 515 |
+
cudaOccCacheConfig cacheConfig;
|
| 516 |
+
// Shared memory / L1 split preference. Supported on only Volta
|
| 517 |
+
int carveoutConfig;
|
| 518 |
+
|
| 519 |
+
#ifdef __cplusplus
|
| 520 |
+
__OCC_INLINE
|
| 521 |
+
cudaOccDeviceState()
|
| 522 |
+
: cacheConfig (CACHE_PREFER_NONE),
|
| 523 |
+
carveoutConfig (SHAREDMEM_CARVEOUT_DEFAULT)
|
| 524 |
+
{}
|
| 525 |
+
#endif
|
| 526 |
+
};
|
| 527 |
+
|
| 528 |
+
typedef enum cudaOccLimitingFactor_enum {
|
| 529 |
+
// Occupancy limited due to:
|
| 530 |
+
OCC_LIMIT_WARPS = 0x01, // - warps available
|
| 531 |
+
OCC_LIMIT_REGISTERS = 0x02, // - registers available
|
| 532 |
+
OCC_LIMIT_SHARED_MEMORY = 0x04, // - shared memory available
|
| 533 |
+
OCC_LIMIT_BLOCKS = 0x08, // - blocks available
|
| 534 |
+
OCC_LIMIT_BARRIERS = 0x10 // - barrier available
|
| 535 |
+
} cudaOccLimitingFactor;
|
| 536 |
+
|
| 537 |
+
/**
|
| 538 |
+
* Occupancy output
|
| 539 |
+
*
|
| 540 |
+
* This structure contains occupancy calculator's output.
|
| 541 |
+
*/
|
| 542 |
+
struct cudaOccResult {
|
| 543 |
+
int activeBlocksPerMultiprocessor; // Occupancy
|
| 544 |
+
unsigned int limitingFactors; // Factors that limited occupancy. A bit
|
| 545 |
+
// field that counts the limiting
|
| 546 |
+
// factors, see cudaOccLimitingFactor
|
| 547 |
+
int blockLimitRegs; // Occupancy due to register
|
| 548 |
+
// usage, INT_MAX if the kernel does not
|
| 549 |
+
// use any register.
|
| 550 |
+
int blockLimitSharedMem; // Occupancy due to shared memory
|
| 551 |
+
// usage, INT_MAX if the kernel does not
|
| 552 |
+
// use shared memory.
|
| 553 |
+
int blockLimitWarps; // Occupancy due to block size limit
|
| 554 |
+
int blockLimitBlocks; // Occupancy due to maximum number of blocks
|
| 555 |
+
// managable per SM
|
| 556 |
+
int blockLimitBarriers; // Occupancy due to block barrier usage
|
| 557 |
+
int allocatedRegistersPerBlock; // Actual number of registers allocated per
|
| 558 |
+
// block
|
| 559 |
+
size_t allocatedSharedMemPerBlock; // Actual size of shared memory allocated
|
| 560 |
+
// per block
|
| 561 |
+
cudaOccPartitionedGCConfig partitionedGCConfig;
|
| 562 |
+
// Report if partitioned global caching
|
| 563 |
+
// is actually enabled.
|
| 564 |
+
};
|
| 565 |
+
|
| 566 |
+
/**
|
| 567 |
+
* Partitioned global caching support
|
| 568 |
+
*
|
| 569 |
+
* See cudaOccPartitionedGlobalCachingModeSupport
|
| 570 |
+
*/
|
| 571 |
+
typedef enum cudaOccPartitionedGCSupport_enum {
|
| 572 |
+
PARTITIONED_GC_NOT_SUPPORTED, // Partitioned global caching is not supported
|
| 573 |
+
PARTITIONED_GC_SUPPORTED, // Partitioned global caching is supported
|
| 574 |
+
} cudaOccPartitionedGCSupport;
|
| 575 |
+
|
| 576 |
+
/**
|
| 577 |
+
* Implementation
|
| 578 |
+
*/
|
| 579 |
+
|
| 580 |
+
/**
|
| 581 |
+
* Max compute capability supported
|
| 582 |
+
*/
|
| 583 |
+
|
| 584 |
+
#define __CUDA_OCC_MAJOR__ 12
|
| 585 |
+
#define __CUDA_OCC_MINOR__ 0
|
| 586 |
+
|
| 587 |
+
//////////////////////////////////////////
|
| 588 |
+
// Mathematical Helper Functions //
|
| 589 |
+
//////////////////////////////////////////
|
| 590 |
+
|
| 591 |
+
static __OCC_INLINE int __occMin(int lhs, int rhs)
|
| 592 |
+
{
|
| 593 |
+
return rhs < lhs ? rhs : lhs;
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
static __OCC_INLINE int __occDivideRoundUp(int x, int y)
|
| 597 |
+
{
|
| 598 |
+
return (x + (y - 1)) / y;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
static __OCC_INLINE int __occRoundUp(int x, int y)
|
| 602 |
+
{
|
| 603 |
+
return y * __occDivideRoundUp(x, y);
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
//////////////////////////////////////////
|
| 607 |
+
// Architectural Properties //
|
| 608 |
+
//////////////////////////////////////////
|
| 609 |
+
|
| 610 |
+
/**
|
| 611 |
+
* Granularity of shared memory allocation
|
| 612 |
+
*/
|
| 613 |
+
static __OCC_INLINE cudaOccError cudaOccSMemAllocationGranularity(int *limit, const cudaOccDeviceProp *properties)
|
| 614 |
+
{
|
| 615 |
+
int value;
|
| 616 |
+
|
| 617 |
+
switch(properties->computeMajor) {
|
| 618 |
+
case 3:
|
| 619 |
+
case 5:
|
| 620 |
+
case 6:
|
| 621 |
+
case 7:
|
| 622 |
+
value = 256;
|
| 623 |
+
break;
|
| 624 |
+
case 8:
|
| 625 |
+
case 9:
|
| 626 |
+
case 10:
|
| 627 |
+
case 12:
|
| 628 |
+
value = 128;
|
| 629 |
+
break;
|
| 630 |
+
default:
|
| 631 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 632 |
+
}
|
| 633 |
+
|
| 634 |
+
*limit = value;
|
| 635 |
+
|
| 636 |
+
return CUDA_OCC_SUCCESS;
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
/**
|
| 640 |
+
* Maximum number of registers per thread
|
| 641 |
+
*/
|
| 642 |
+
static __OCC_INLINE cudaOccError cudaOccRegAllocationMaxPerThread(int *limit, const cudaOccDeviceProp *properties)
|
| 643 |
+
{
|
| 644 |
+
int value;
|
| 645 |
+
|
| 646 |
+
switch(properties->computeMajor) {
|
| 647 |
+
case 3:
|
| 648 |
+
case 5:
|
| 649 |
+
case 6:
|
| 650 |
+
value = 255;
|
| 651 |
+
break;
|
| 652 |
+
case 7:
|
| 653 |
+
case 8:
|
| 654 |
+
case 9:
|
| 655 |
+
case 10:
|
| 656 |
+
case 12:
|
| 657 |
+
value = 256;
|
| 658 |
+
break;
|
| 659 |
+
default:
|
| 660 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
*limit = value;
|
| 664 |
+
|
| 665 |
+
return CUDA_OCC_SUCCESS;
|
| 666 |
+
}
|
| 667 |
+
|
| 668 |
+
/**
|
| 669 |
+
* Granularity of register allocation
|
| 670 |
+
*/
|
| 671 |
+
static __OCC_INLINE cudaOccError cudaOccRegAllocationGranularity(int *limit, const cudaOccDeviceProp *properties)
|
| 672 |
+
{
|
| 673 |
+
int value;
|
| 674 |
+
|
| 675 |
+
switch(properties->computeMajor) {
|
| 676 |
+
case 3:
|
| 677 |
+
case 5:
|
| 678 |
+
case 6:
|
| 679 |
+
case 7:
|
| 680 |
+
case 8:
|
| 681 |
+
case 9:
|
| 682 |
+
case 10:
|
| 683 |
+
case 12:
|
| 684 |
+
value = 256;
|
| 685 |
+
break;
|
| 686 |
+
default:
|
| 687 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
*limit = value;
|
| 691 |
+
|
| 692 |
+
return CUDA_OCC_SUCCESS;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
/**
|
| 696 |
+
* Number of sub-partitions
|
| 697 |
+
*/
|
| 698 |
+
static __OCC_INLINE cudaOccError cudaOccSubPartitionsPerMultiprocessor(int *limit, const cudaOccDeviceProp *properties)
|
| 699 |
+
{
|
| 700 |
+
int value;
|
| 701 |
+
|
| 702 |
+
switch(properties->computeMajor) {
|
| 703 |
+
case 3:
|
| 704 |
+
case 5:
|
| 705 |
+
case 7:
|
| 706 |
+
case 8:
|
| 707 |
+
case 9:
|
| 708 |
+
case 10:
|
| 709 |
+
case 12:
|
| 710 |
+
value = 4;
|
| 711 |
+
break;
|
| 712 |
+
case 6:
|
| 713 |
+
value = properties->computeMinor ? 4 : 2;
|
| 714 |
+
break;
|
| 715 |
+
default:
|
| 716 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 717 |
+
}
|
| 718 |
+
|
| 719 |
+
*limit = value;
|
| 720 |
+
|
| 721 |
+
return CUDA_OCC_SUCCESS;
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
/**
|
| 726 |
+
* Maximum number of blocks that can run simultaneously on a multiprocessor
|
| 727 |
+
*/
|
| 728 |
+
static __OCC_INLINE cudaOccError cudaOccMaxBlocksPerMultiprocessor(int* limit, const cudaOccDeviceProp *properties)
|
| 729 |
+
{
|
| 730 |
+
int value;
|
| 731 |
+
|
| 732 |
+
switch(properties->computeMajor) {
|
| 733 |
+
case 3:
|
| 734 |
+
value = 16;
|
| 735 |
+
break;
|
| 736 |
+
case 5:
|
| 737 |
+
case 6:
|
| 738 |
+
value = 32;
|
| 739 |
+
break;
|
| 740 |
+
case 7: {
|
| 741 |
+
int isTuring = properties->computeMinor == 5;
|
| 742 |
+
value = (isTuring) ? 16 : 32;
|
| 743 |
+
break;
|
| 744 |
+
}
|
| 745 |
+
case 8:
|
| 746 |
+
if (properties->computeMinor == 0) {
|
| 747 |
+
value = 32;
|
| 748 |
+
}
|
| 749 |
+
else if (properties->computeMinor == 9) {
|
| 750 |
+
value = 24;
|
| 751 |
+
}
|
| 752 |
+
else {
|
| 753 |
+
value = 16;
|
| 754 |
+
}
|
| 755 |
+
break;
|
| 756 |
+
case 9:
|
| 757 |
+
value = 32;
|
| 758 |
+
break;
|
| 759 |
+
case 10:
|
| 760 |
+
switch(properties->computeMinor) {
|
| 761 |
+
case 1 :
|
| 762 |
+
value = 24;
|
| 763 |
+
break;
|
| 764 |
+
case 0 : /* explicitly added to avoid build failure in WDDM driver components */
|
| 765 |
+
default :
|
| 766 |
+
value = 32;
|
| 767 |
+
}
|
| 768 |
+
break;
|
| 769 |
+
case 12:
|
| 770 |
+
value = 24;
|
| 771 |
+
break;
|
| 772 |
+
default:
|
| 773 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
*limit = value;
|
| 777 |
+
|
| 778 |
+
return CUDA_OCC_SUCCESS;
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
/**
|
| 782 |
+
* Align up shared memory based on compute major configurations
|
| 783 |
+
*/
|
| 784 |
+
static __OCC_INLINE cudaOccError cudaOccAlignUpShmemSizeVoltaPlus(size_t *shMemSize, const cudaOccDeviceProp *properties)
|
| 785 |
+
{
|
| 786 |
+
// Volta and Turing have shared L1 cache / shared memory, and support cache
|
| 787 |
+
// configuration to trade one for the other. These values are needed to
|
| 788 |
+
// map carveout config ratio to the next available architecture size
|
| 789 |
+
size_t size = *shMemSize;
|
| 790 |
+
|
| 791 |
+
switch (properties->computeMajor) {
|
| 792 |
+
case 7: {
|
| 793 |
+
// Turing supports 32KB and 64KB shared mem.
|
| 794 |
+
int isTuring = properties->computeMinor == 5;
|
| 795 |
+
if (isTuring) {
|
| 796 |
+
if (size <= 32 * 1024) {
|
| 797 |
+
*shMemSize = 32 * 1024;
|
| 798 |
+
}
|
| 799 |
+
else if (size <= 64 * 1024) {
|
| 800 |
+
*shMemSize = 64 * 1024;
|
| 801 |
+
}
|
| 802 |
+
else {
|
| 803 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 804 |
+
}
|
| 805 |
+
}
|
| 806 |
+
// Volta supports 0KB, 8KB, 16KB, 32KB, 64KB, and 96KB shared mem.
|
| 807 |
+
else {
|
| 808 |
+
if (size == 0) {
|
| 809 |
+
*shMemSize = 0;
|
| 810 |
+
}
|
| 811 |
+
else if (size <= 8 * 1024) {
|
| 812 |
+
*shMemSize = 8 * 1024;
|
| 813 |
+
}
|
| 814 |
+
else if (size <= 16 * 1024) {
|
| 815 |
+
*shMemSize = 16 * 1024;
|
| 816 |
+
}
|
| 817 |
+
else if (size <= 32 * 1024) {
|
| 818 |
+
*shMemSize = 32 * 1024;
|
| 819 |
+
}
|
| 820 |
+
else if (size <= 64 * 1024) {
|
| 821 |
+
*shMemSize = 64 * 1024;
|
| 822 |
+
}
|
| 823 |
+
else if (size <= 96 * 1024) {
|
| 824 |
+
*shMemSize = 96 * 1024;
|
| 825 |
+
}
|
| 826 |
+
else {
|
| 827 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 828 |
+
}
|
| 829 |
+
}
|
| 830 |
+
break;
|
| 831 |
+
}
|
| 832 |
+
case 8:
|
| 833 |
+
if (properties->computeMinor == 0 || properties->computeMinor == 7) {
|
| 834 |
+
if (size == 0) {
|
| 835 |
+
*shMemSize = 0;
|
| 836 |
+
}
|
| 837 |
+
else if (size <= 8 * 1024) {
|
| 838 |
+
*shMemSize = 8 * 1024;
|
| 839 |
+
}
|
| 840 |
+
else if (size <= 16 * 1024) {
|
| 841 |
+
*shMemSize = 16 * 1024;
|
| 842 |
+
}
|
| 843 |
+
else if (size <= 32 * 1024) {
|
| 844 |
+
*shMemSize = 32 * 1024;
|
| 845 |
+
}
|
| 846 |
+
else if (size <= 64 * 1024) {
|
| 847 |
+
*shMemSize = 64 * 1024;
|
| 848 |
+
}
|
| 849 |
+
else if (size <= 100 * 1024) {
|
| 850 |
+
*shMemSize = 100 * 1024;
|
| 851 |
+
}
|
| 852 |
+
else if (size <= 132 * 1024) {
|
| 853 |
+
*shMemSize = 132 * 1024;
|
| 854 |
+
}
|
| 855 |
+
else if (size <= 164 * 1024) {
|
| 856 |
+
*shMemSize = 164 * 1024;
|
| 857 |
+
}
|
| 858 |
+
else {
|
| 859 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 860 |
+
}
|
| 861 |
+
}
|
| 862 |
+
else {
|
| 863 |
+
if (size == 0) {
|
| 864 |
+
*shMemSize = 0;
|
| 865 |
+
}
|
| 866 |
+
else if (size <= 8 * 1024) {
|
| 867 |
+
*shMemSize = 8 * 1024;
|
| 868 |
+
}
|
| 869 |
+
else if (size <= 16 * 1024) {
|
| 870 |
+
*shMemSize = 16 * 1024;
|
| 871 |
+
}
|
| 872 |
+
else if (size <= 32 * 1024) {
|
| 873 |
+
*shMemSize = 32 * 1024;
|
| 874 |
+
}
|
| 875 |
+
else if (size <= 64 * 1024) {
|
| 876 |
+
*shMemSize = 64 * 1024;
|
| 877 |
+
}
|
| 878 |
+
else if (size <= 100 * 1024) {
|
| 879 |
+
*shMemSize = 100 * 1024;
|
| 880 |
+
}
|
| 881 |
+
else {
|
| 882 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 883 |
+
}
|
| 884 |
+
}
|
| 885 |
+
break;
|
| 886 |
+
case 9: {
|
| 887 |
+
if (size == 0) {
|
| 888 |
+
*shMemSize = 0;
|
| 889 |
+
}
|
| 890 |
+
else if (size <= 8 * 1024) {
|
| 891 |
+
*shMemSize = 8 * 1024;
|
| 892 |
+
}
|
| 893 |
+
else if (size <= 16 * 1024) {
|
| 894 |
+
*shMemSize = 16 * 1024;
|
| 895 |
+
}
|
| 896 |
+
else if (size <= 32 * 1024) {
|
| 897 |
+
*shMemSize = 32 * 1024;
|
| 898 |
+
}
|
| 899 |
+
else if (size <= 64 * 1024) {
|
| 900 |
+
*shMemSize = 64 * 1024;
|
| 901 |
+
}
|
| 902 |
+
else if (size <= 100 * 1024) {
|
| 903 |
+
*shMemSize = 100 * 1024;
|
| 904 |
+
}
|
| 905 |
+
else if (size <= 132 * 1024) {
|
| 906 |
+
*shMemSize = 132 * 1024;
|
| 907 |
+
}
|
| 908 |
+
else if (size <= 164 * 1024) {
|
| 909 |
+
*shMemSize = 164 * 1024;
|
| 910 |
+
}
|
| 911 |
+
else if (size <= 196 * 1024) {
|
| 912 |
+
*shMemSize = 196 * 1024;
|
| 913 |
+
}
|
| 914 |
+
else if (size <= 228 * 1024) {
|
| 915 |
+
*shMemSize = 228 * 1024;
|
| 916 |
+
}
|
| 917 |
+
else {
|
| 918 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 919 |
+
}
|
| 920 |
+
break;
|
| 921 |
+
}
|
| 922 |
+
case 10: {
|
| 923 |
+
switch (properties->computeMinor) {
|
| 924 |
+
// GB10x GPUs in Blackwell family have the below compute minors and corresponding
|
| 925 |
+
// shared memory configs
|
| 926 |
+
case 0:
|
| 927 |
+
case 1:
|
| 928 |
+
if (size == 0) {
|
| 929 |
+
*shMemSize = 0;
|
| 930 |
+
}
|
| 931 |
+
else if (size <= 8 * 1024) {
|
| 932 |
+
*shMemSize = 8 * 1024;
|
| 933 |
+
}
|
| 934 |
+
else if (size <= 16 * 1024) {
|
| 935 |
+
*shMemSize = 16 * 1024;
|
| 936 |
+
}
|
| 937 |
+
else if (size <= 32 * 1024) {
|
| 938 |
+
*shMemSize = 32 * 1024;
|
| 939 |
+
}
|
| 940 |
+
else if (size <= 64 * 1024) {
|
| 941 |
+
*shMemSize = 64 * 1024;
|
| 942 |
+
}
|
| 943 |
+
else if (size <= 100 * 1024) {
|
| 944 |
+
*shMemSize = 100 * 1024;
|
| 945 |
+
}
|
| 946 |
+
else if (size <= 132 * 1024) {
|
| 947 |
+
*shMemSize = 132 * 1024;
|
| 948 |
+
}
|
| 949 |
+
else if (size <= 164 * 1024) {
|
| 950 |
+
*shMemSize = 164 * 1024;
|
| 951 |
+
}
|
| 952 |
+
else if (size <= 196 * 1024) {
|
| 953 |
+
*shMemSize = 196 * 1024;
|
| 954 |
+
}
|
| 955 |
+
else if (size <= 228 * 1024) {
|
| 956 |
+
*shMemSize = 228 * 1024;
|
| 957 |
+
}
|
| 958 |
+
else {
|
| 959 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 960 |
+
}
|
| 961 |
+
break;
|
| 962 |
+
default:
|
| 963 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 964 |
+
}
|
| 965 |
+
break;
|
| 966 |
+
}
|
| 967 |
+
case 12: {
|
| 968 |
+
switch (properties->computeMinor) {
|
| 969 |
+
case 0:
|
| 970 |
+
if (size == 0) {
|
| 971 |
+
*shMemSize = 0;
|
| 972 |
+
}
|
| 973 |
+
else if (size <= 8 * 1024) {
|
| 974 |
+
*shMemSize = 8 * 1024;
|
| 975 |
+
}
|
| 976 |
+
else if (size <= 16 * 1024) {
|
| 977 |
+
*shMemSize = 16 * 1024;
|
| 978 |
+
}
|
| 979 |
+
else if (size <= 32 * 1024) {
|
| 980 |
+
*shMemSize = 32 * 1024;
|
| 981 |
+
}
|
| 982 |
+
else if (size <= 64 * 1024) {
|
| 983 |
+
*shMemSize = 64 * 1024;
|
| 984 |
+
}
|
| 985 |
+
else if (size <= 100 * 1024) {
|
| 986 |
+
*shMemSize = 100 * 1024;
|
| 987 |
+
}
|
| 988 |
+
else {
|
| 989 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 990 |
+
}
|
| 991 |
+
break;
|
| 992 |
+
default:
|
| 993 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 994 |
+
}
|
| 995 |
+
break;
|
| 996 |
+
}
|
| 997 |
+
break;
|
| 998 |
+
default:
|
| 999 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 1000 |
+
}
|
| 1001 |
+
|
| 1002 |
+
return CUDA_OCC_SUCCESS;
|
| 1003 |
+
}
|
| 1004 |
+
|
| 1005 |
+
/**
|
| 1006 |
+
* Shared memory based on the new carveoutConfig API introduced with Volta
|
| 1007 |
+
*/
|
| 1008 |
+
static __OCC_INLINE cudaOccError cudaOccSMemPreferenceVoltaPlus(size_t *limit, const cudaOccDeviceProp *properties, const cudaOccDeviceState *state)
|
| 1009 |
+
{
|
| 1010 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1011 |
+
size_t preferenceShmemSize;
|
| 1012 |
+
|
| 1013 |
+
// CUDA 9.0 introduces a new API to set shared memory - L1 configuration on supported
|
| 1014 |
+
// devices. This preference will take precedence over the older cacheConfig setting.
|
| 1015 |
+
// Map cacheConfig to its effective preference value.
|
| 1016 |
+
int effectivePreference = state->carveoutConfig;
|
| 1017 |
+
if ((effectivePreference < SHAREDMEM_CARVEOUT_DEFAULT) || (effectivePreference > SHAREDMEM_CARVEOUT_MAX_SHARED)) {
|
| 1018 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1019 |
+
}
|
| 1020 |
+
|
| 1021 |
+
if (effectivePreference == SHAREDMEM_CARVEOUT_DEFAULT) {
|
| 1022 |
+
switch (state->cacheConfig)
|
| 1023 |
+
{
|
| 1024 |
+
case CACHE_PREFER_L1:
|
| 1025 |
+
effectivePreference = SHAREDMEM_CARVEOUT_MAX_L1;
|
| 1026 |
+
break;
|
| 1027 |
+
case CACHE_PREFER_SHARED:
|
| 1028 |
+
effectivePreference = SHAREDMEM_CARVEOUT_MAX_SHARED;
|
| 1029 |
+
break;
|
| 1030 |
+
case CACHE_PREFER_EQUAL:
|
| 1031 |
+
effectivePreference = SHAREDMEM_CARVEOUT_HALF;
|
| 1032 |
+
break;
|
| 1033 |
+
default:
|
| 1034 |
+
effectivePreference = SHAREDMEM_CARVEOUT_DEFAULT;
|
| 1035 |
+
break;
|
| 1036 |
+
}
|
| 1037 |
+
}
|
| 1038 |
+
|
| 1039 |
+
if (effectivePreference == SHAREDMEM_CARVEOUT_DEFAULT) {
|
| 1040 |
+
preferenceShmemSize = properties->sharedMemPerMultiprocessor;
|
| 1041 |
+
}
|
| 1042 |
+
else {
|
| 1043 |
+
preferenceShmemSize = (size_t) (effectivePreference * properties->sharedMemPerMultiprocessor) / 100;
|
| 1044 |
+
}
|
| 1045 |
+
|
| 1046 |
+
status = cudaOccAlignUpShmemSizeVoltaPlus(&preferenceShmemSize, properties);
|
| 1047 |
+
*limit = preferenceShmemSize;
|
| 1048 |
+
return status;
|
| 1049 |
+
}
|
| 1050 |
+
|
| 1051 |
+
/**
|
| 1052 |
+
* Shared memory based on the cacheConfig
|
| 1053 |
+
*/
|
| 1054 |
+
static __OCC_INLINE cudaOccError cudaOccSMemPreference(size_t *limit, const cudaOccDeviceProp *properties, const cudaOccDeviceState *state)
|
| 1055 |
+
{
|
| 1056 |
+
size_t bytes = 0;
|
| 1057 |
+
size_t sharedMemPerMultiprocessorHigh = properties->sharedMemPerMultiprocessor;
|
| 1058 |
+
cudaOccCacheConfig cacheConfig = state->cacheConfig;
|
| 1059 |
+
|
| 1060 |
+
// Kepler has shared L1 cache / shared memory, and support cache
|
| 1061 |
+
// configuration to trade one for the other. These values are needed to
|
| 1062 |
+
// calculate the correct shared memory size for user requested cache
|
| 1063 |
+
// configuration.
|
| 1064 |
+
//
|
| 1065 |
+
size_t minCacheSize = 16384;
|
| 1066 |
+
size_t maxCacheSize = 49152;
|
| 1067 |
+
size_t cacheAndSharedTotal = sharedMemPerMultiprocessorHigh + minCacheSize;
|
| 1068 |
+
size_t sharedMemPerMultiprocessorLow = cacheAndSharedTotal - maxCacheSize;
|
| 1069 |
+
|
| 1070 |
+
switch (properties->computeMajor) {
|
| 1071 |
+
case 3:
|
| 1072 |
+
// Kepler supports 16KB, 32KB, or 48KB partitions for L1. The rest
|
| 1073 |
+
// is shared memory.
|
| 1074 |
+
//
|
| 1075 |
+
switch (cacheConfig) {
|
| 1076 |
+
default :
|
| 1077 |
+
case CACHE_PREFER_NONE:
|
| 1078 |
+
case CACHE_PREFER_SHARED:
|
| 1079 |
+
bytes = sharedMemPerMultiprocessorHigh;
|
| 1080 |
+
break;
|
| 1081 |
+
case CACHE_PREFER_L1:
|
| 1082 |
+
bytes = sharedMemPerMultiprocessorLow;
|
| 1083 |
+
break;
|
| 1084 |
+
case CACHE_PREFER_EQUAL:
|
| 1085 |
+
// Equal is the mid-point between high and low. It should be
|
| 1086 |
+
// equivalent to low + 16KB.
|
| 1087 |
+
//
|
| 1088 |
+
bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2;
|
| 1089 |
+
break;
|
| 1090 |
+
}
|
| 1091 |
+
break;
|
| 1092 |
+
case 5:
|
| 1093 |
+
case 6:
|
| 1094 |
+
// Maxwell and Pascal have dedicated shared memory.
|
| 1095 |
+
//
|
| 1096 |
+
bytes = sharedMemPerMultiprocessorHigh;
|
| 1097 |
+
break;
|
| 1098 |
+
default:
|
| 1099 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 1100 |
+
}
|
| 1101 |
+
|
| 1102 |
+
*limit = bytes;
|
| 1103 |
+
|
| 1104 |
+
return CUDA_OCC_SUCCESS;
|
| 1105 |
+
}
|
| 1106 |
+
|
| 1107 |
+
/**
|
| 1108 |
+
* Shared memory based on config requested by User
|
| 1109 |
+
*/
|
| 1110 |
+
static __OCC_INLINE cudaOccError cudaOccSMemPerMultiprocessor(size_t *limit, const cudaOccDeviceProp *properties, const cudaOccDeviceState *state)
|
| 1111 |
+
{
|
| 1112 |
+
// Volta introduces a new API that allows for shared memory carveout preference. Because it is a shared memory preference,
|
| 1113 |
+
// it is handled separately from the cache config preference.
|
| 1114 |
+
if (properties->computeMajor >= 7) {
|
| 1115 |
+
return cudaOccSMemPreferenceVoltaPlus(limit, properties, state);
|
| 1116 |
+
}
|
| 1117 |
+
return cudaOccSMemPreference(limit, properties, state);
|
| 1118 |
+
}
|
| 1119 |
+
|
| 1120 |
+
/**
|
| 1121 |
+
* Return the per block shared memory limit based on function config
|
| 1122 |
+
*/
|
| 1123 |
+
static __OCC_INLINE cudaOccError cudaOccSMemPerBlock(size_t *limit, const cudaOccDeviceProp *properties, cudaOccFuncShmemConfig shmemLimitConfig, size_t smemPerCta)
|
| 1124 |
+
{
|
| 1125 |
+
switch (properties->computeMajor) {
|
| 1126 |
+
case 2:
|
| 1127 |
+
case 3:
|
| 1128 |
+
case 4:
|
| 1129 |
+
case 5:
|
| 1130 |
+
case 6:
|
| 1131 |
+
*limit = properties->sharedMemPerBlock;
|
| 1132 |
+
break;
|
| 1133 |
+
case 7:
|
| 1134 |
+
case 8:
|
| 1135 |
+
case 9:
|
| 1136 |
+
case 10:
|
| 1137 |
+
case 12:
|
| 1138 |
+
switch (shmemLimitConfig) {
|
| 1139 |
+
default:
|
| 1140 |
+
case FUNC_SHMEM_LIMIT_DEFAULT:
|
| 1141 |
+
*limit = properties->sharedMemPerBlock;
|
| 1142 |
+
break;
|
| 1143 |
+
case FUNC_SHMEM_LIMIT_OPTIN:
|
| 1144 |
+
if (smemPerCta > properties->sharedMemPerBlock) {
|
| 1145 |
+
*limit = properties->sharedMemPerBlockOptin;
|
| 1146 |
+
}
|
| 1147 |
+
else {
|
| 1148 |
+
*limit = properties->sharedMemPerBlock;
|
| 1149 |
+
}
|
| 1150 |
+
break;
|
| 1151 |
+
}
|
| 1152 |
+
break;
|
| 1153 |
+
default:
|
| 1154 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
// Starting Ampere, CUDA driver reserves additional shared memory per block
|
| 1158 |
+
if (properties->computeMajor >= 8) {
|
| 1159 |
+
*limit += properties->reservedSharedMemPerBlock;
|
| 1160 |
+
}
|
| 1161 |
+
|
| 1162 |
+
return CUDA_OCC_SUCCESS;
|
| 1163 |
+
}
|
| 1164 |
+
|
| 1165 |
+
/**
|
| 1166 |
+
* Partitioned global caching mode support
|
| 1167 |
+
*/
|
| 1168 |
+
static __OCC_INLINE cudaOccError cudaOccPartitionedGlobalCachingModeSupport(cudaOccPartitionedGCSupport *limit, const cudaOccDeviceProp *properties)
|
| 1169 |
+
{
|
| 1170 |
+
*limit = PARTITIONED_GC_NOT_SUPPORTED;
|
| 1171 |
+
|
| 1172 |
+
if ((properties->computeMajor == 5 && (properties->computeMinor == 2 || properties->computeMinor == 3)) ||
|
| 1173 |
+
properties->computeMajor == 6) {
|
| 1174 |
+
*limit = PARTITIONED_GC_SUPPORTED;
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
if (properties->computeMajor == 6 && properties->computeMinor == 0) {
|
| 1178 |
+
*limit = PARTITIONED_GC_NOT_SUPPORTED;
|
| 1179 |
+
}
|
| 1180 |
+
|
| 1181 |
+
return CUDA_OCC_SUCCESS;
|
| 1182 |
+
}
|
| 1183 |
+
|
| 1184 |
+
///////////////////////////////////////////////
|
| 1185 |
+
// User Input Sanity //
|
| 1186 |
+
///////////////////////////////////////////////
|
| 1187 |
+
|
| 1188 |
+
static __OCC_INLINE cudaOccError cudaOccDevicePropCheck(const cudaOccDeviceProp *properties)
|
| 1189 |
+
{
|
| 1190 |
+
// Verify device properties
|
| 1191 |
+
//
|
| 1192 |
+
// Each of these limits must be a positive number.
|
| 1193 |
+
//
|
| 1194 |
+
// Compute capacity is checked during the occupancy calculation
|
| 1195 |
+
//
|
| 1196 |
+
if (properties->maxThreadsPerBlock <= 0 ||
|
| 1197 |
+
properties->maxThreadsPerMultiprocessor <= 0 ||
|
| 1198 |
+
properties->regsPerBlock <= 0 ||
|
| 1199 |
+
properties->regsPerMultiprocessor <= 0 ||
|
| 1200 |
+
properties->warpSize <= 0 ||
|
| 1201 |
+
properties->sharedMemPerBlock <= 0 ||
|
| 1202 |
+
properties->sharedMemPerMultiprocessor <= 0 ||
|
| 1203 |
+
properties->numSms <= 0) {
|
| 1204 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1205 |
+
}
|
| 1206 |
+
|
| 1207 |
+
return CUDA_OCC_SUCCESS;
|
| 1208 |
+
}
|
| 1209 |
+
|
| 1210 |
+
static __OCC_INLINE cudaOccError cudaOccFuncAttributesCheck(const cudaOccFuncAttributes *attributes)
|
| 1211 |
+
{
|
| 1212 |
+
// Verify function attributes
|
| 1213 |
+
//
|
| 1214 |
+
if (attributes->maxThreadsPerBlock <= 0 ||
|
| 1215 |
+
attributes->numRegs < 0) { // Compiler may choose not to use
|
| 1216 |
+
// any register (empty kernels,
|
| 1217 |
+
// etc.)
|
| 1218 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1219 |
+
}
|
| 1220 |
+
|
| 1221 |
+
return CUDA_OCC_SUCCESS;
|
| 1222 |
+
}
|
| 1223 |
+
|
| 1224 |
+
static __OCC_INLINE cudaOccError cudaOccDeviceStateCheck(const cudaOccDeviceState *state)
|
| 1225 |
+
{
|
| 1226 |
+
(void)state; // silence unused-variable warning
|
| 1227 |
+
// Placeholder
|
| 1228 |
+
//
|
| 1229 |
+
|
| 1230 |
+
return CUDA_OCC_SUCCESS;
|
| 1231 |
+
}
|
| 1232 |
+
|
| 1233 |
+
static __OCC_INLINE cudaOccError cudaOccInputCheck(
|
| 1234 |
+
const cudaOccDeviceProp *properties,
|
| 1235 |
+
const cudaOccFuncAttributes *attributes,
|
| 1236 |
+
const cudaOccDeviceState *state)
|
| 1237 |
+
{
|
| 1238 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1239 |
+
|
| 1240 |
+
status = cudaOccDevicePropCheck(properties);
|
| 1241 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1242 |
+
return status;
|
| 1243 |
+
}
|
| 1244 |
+
|
| 1245 |
+
status = cudaOccFuncAttributesCheck(attributes);
|
| 1246 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1247 |
+
return status;
|
| 1248 |
+
}
|
| 1249 |
+
|
| 1250 |
+
status = cudaOccDeviceStateCheck(state);
|
| 1251 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1252 |
+
return status;
|
| 1253 |
+
}
|
| 1254 |
+
|
| 1255 |
+
return status;
|
| 1256 |
+
}
|
| 1257 |
+
|
| 1258 |
+
///////////////////////////////////////////////
|
| 1259 |
+
// Occupancy calculation Functions //
|
| 1260 |
+
///////////////////////////////////////////////
|
| 1261 |
+
|
| 1262 |
+
static __OCC_INLINE cudaOccPartitionedGCConfig cudaOccPartitionedGCExpected(
|
| 1263 |
+
const cudaOccDeviceProp *properties,
|
| 1264 |
+
const cudaOccFuncAttributes *attributes)
|
| 1265 |
+
{
|
| 1266 |
+
cudaOccPartitionedGCSupport gcSupport;
|
| 1267 |
+
cudaOccPartitionedGCConfig gcConfig;
|
| 1268 |
+
|
| 1269 |
+
cudaOccPartitionedGlobalCachingModeSupport(&gcSupport, properties);
|
| 1270 |
+
|
| 1271 |
+
gcConfig = attributes->partitionedGCConfig;
|
| 1272 |
+
|
| 1273 |
+
if (gcSupport == PARTITIONED_GC_NOT_SUPPORTED) {
|
| 1274 |
+
gcConfig = PARTITIONED_GC_OFF;
|
| 1275 |
+
}
|
| 1276 |
+
|
| 1277 |
+
return gcConfig;
|
| 1278 |
+
}
|
| 1279 |
+
|
| 1280 |
+
// Warp limit
|
| 1281 |
+
//
|
| 1282 |
+
static __OCC_INLINE cudaOccError cudaOccMaxBlocksPerSMWarpsLimit(
|
| 1283 |
+
int *limit,
|
| 1284 |
+
cudaOccPartitionedGCConfig gcConfig,
|
| 1285 |
+
const cudaOccDeviceProp *properties,
|
| 1286 |
+
const cudaOccFuncAttributes *attributes,
|
| 1287 |
+
int blockSize)
|
| 1288 |
+
{
|
| 1289 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1290 |
+
int maxWarpsPerSm;
|
| 1291 |
+
int warpsAllocatedPerCTA;
|
| 1292 |
+
int maxBlocks;
|
| 1293 |
+
(void)attributes; // silence unused-variable warning
|
| 1294 |
+
|
| 1295 |
+
if (blockSize > properties->maxThreadsPerBlock) {
|
| 1296 |
+
maxBlocks = 0;
|
| 1297 |
+
}
|
| 1298 |
+
else {
|
| 1299 |
+
maxWarpsPerSm = properties->maxThreadsPerMultiprocessor / properties->warpSize;
|
| 1300 |
+
warpsAllocatedPerCTA = __occDivideRoundUp(blockSize, properties->warpSize);
|
| 1301 |
+
maxBlocks = 0;
|
| 1302 |
+
|
| 1303 |
+
if (gcConfig != PARTITIONED_GC_OFF) {
|
| 1304 |
+
int maxBlocksPerSmPartition;
|
| 1305 |
+
int maxWarpsPerSmPartition;
|
| 1306 |
+
|
| 1307 |
+
// If partitioned global caching is on, then a CTA can only use a SM
|
| 1308 |
+
// partition (a half SM), and thus a half of the warp slots
|
| 1309 |
+
// available per SM
|
| 1310 |
+
//
|
| 1311 |
+
maxWarpsPerSmPartition = maxWarpsPerSm / 2;
|
| 1312 |
+
maxBlocksPerSmPartition = maxWarpsPerSmPartition / warpsAllocatedPerCTA;
|
| 1313 |
+
maxBlocks = maxBlocksPerSmPartition * 2;
|
| 1314 |
+
}
|
| 1315 |
+
// On hardware that supports partitioned global caching, each half SM is
|
| 1316 |
+
// guaranteed to support at least 32 warps (maximum number of warps of a
|
| 1317 |
+
// CTA), so caching will not cause 0 occupancy due to insufficient warp
|
| 1318 |
+
// allocation slots.
|
| 1319 |
+
//
|
| 1320 |
+
else {
|
| 1321 |
+
maxBlocks = maxWarpsPerSm / warpsAllocatedPerCTA;
|
| 1322 |
+
}
|
| 1323 |
+
}
|
| 1324 |
+
|
| 1325 |
+
*limit = maxBlocks;
|
| 1326 |
+
|
| 1327 |
+
return status;
|
| 1328 |
+
}
|
| 1329 |
+
|
| 1330 |
+
// Shared memory limit
|
| 1331 |
+
//
|
| 1332 |
+
static __OCC_INLINE cudaOccError cudaOccMaxBlocksPerSMSmemLimit(
|
| 1333 |
+
int *limit,
|
| 1334 |
+
cudaOccResult *result,
|
| 1335 |
+
const cudaOccDeviceProp *properties,
|
| 1336 |
+
const cudaOccFuncAttributes *attributes,
|
| 1337 |
+
const cudaOccDeviceState *state,
|
| 1338 |
+
int blockSize,
|
| 1339 |
+
size_t dynamicSmemSize)
|
| 1340 |
+
{
|
| 1341 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1342 |
+
int allocationGranularity;
|
| 1343 |
+
size_t userSmemPreference = 0;
|
| 1344 |
+
size_t totalSmemUsagePerCTA;
|
| 1345 |
+
size_t maxSmemUsagePerCTA;
|
| 1346 |
+
size_t smemAllocatedPerCTA;
|
| 1347 |
+
size_t staticSmemSize;
|
| 1348 |
+
size_t sharedMemPerMultiprocessor;
|
| 1349 |
+
size_t smemLimitPerCTA;
|
| 1350 |
+
int maxBlocks;
|
| 1351 |
+
int dynamicSmemSizeExceeded = 0;
|
| 1352 |
+
int totalSmemSizeExceeded = 0;
|
| 1353 |
+
(void)blockSize; // silence unused-variable warning
|
| 1354 |
+
|
| 1355 |
+
status = cudaOccSMemAllocationGranularity(&allocationGranularity, properties);
|
| 1356 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1357 |
+
return status;
|
| 1358 |
+
}
|
| 1359 |
+
|
| 1360 |
+
// Obtain the user preferred shared memory size. This setting is ignored if
|
| 1361 |
+
// user requests more shared memory than preferred.
|
| 1362 |
+
//
|
| 1363 |
+
status = cudaOccSMemPerMultiprocessor(&userSmemPreference, properties, state);
|
| 1364 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1365 |
+
return status;
|
| 1366 |
+
}
|
| 1367 |
+
|
| 1368 |
+
staticSmemSize = attributes->sharedSizeBytes + properties->reservedSharedMemPerBlock;
|
| 1369 |
+
totalSmemUsagePerCTA = staticSmemSize + dynamicSmemSize;
|
| 1370 |
+
smemAllocatedPerCTA = __occRoundUp((int)totalSmemUsagePerCTA, (int)allocationGranularity);
|
| 1371 |
+
|
| 1372 |
+
maxSmemUsagePerCTA = staticSmemSize + attributes->maxDynamicSharedSizeBytes;
|
| 1373 |
+
|
| 1374 |
+
dynamicSmemSizeExceeded = 0;
|
| 1375 |
+
totalSmemSizeExceeded = 0;
|
| 1376 |
+
|
| 1377 |
+
// Obtain the user set maximum dynamic size if it exists
|
| 1378 |
+
// If so, the current launch dynamic shared memory must not
|
| 1379 |
+
// exceed the set limit
|
| 1380 |
+
if (attributes->shmemLimitConfig != FUNC_SHMEM_LIMIT_DEFAULT &&
|
| 1381 |
+
dynamicSmemSize > attributes->maxDynamicSharedSizeBytes) {
|
| 1382 |
+
dynamicSmemSizeExceeded = 1;
|
| 1383 |
+
}
|
| 1384 |
+
|
| 1385 |
+
status = cudaOccSMemPerBlock(&smemLimitPerCTA, properties, attributes->shmemLimitConfig, maxSmemUsagePerCTA);
|
| 1386 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1387 |
+
return status;
|
| 1388 |
+
}
|
| 1389 |
+
|
| 1390 |
+
if (smemAllocatedPerCTA > smemLimitPerCTA) {
|
| 1391 |
+
totalSmemSizeExceeded = 1;
|
| 1392 |
+
}
|
| 1393 |
+
|
| 1394 |
+
if (dynamicSmemSizeExceeded || totalSmemSizeExceeded) {
|
| 1395 |
+
maxBlocks = 0;
|
| 1396 |
+
}
|
| 1397 |
+
else {
|
| 1398 |
+
// User requested shared memory limit is used as long as it is greater
|
| 1399 |
+
// than the total shared memory used per CTA, i.e. as long as at least
|
| 1400 |
+
// one CTA can be launched.
|
| 1401 |
+
if (userSmemPreference >= smemAllocatedPerCTA) {
|
| 1402 |
+
sharedMemPerMultiprocessor = userSmemPreference;
|
| 1403 |
+
}
|
| 1404 |
+
else {
|
| 1405 |
+
// On Volta+, user requested shared memory will limit occupancy
|
| 1406 |
+
// if it's less than shared memory per CTA. Otherwise, the
|
| 1407 |
+
// maximum shared memory limit is used.
|
| 1408 |
+
if (properties->computeMajor >= 7) {
|
| 1409 |
+
sharedMemPerMultiprocessor = smemAllocatedPerCTA;
|
| 1410 |
+
status = cudaOccAlignUpShmemSizeVoltaPlus(&sharedMemPerMultiprocessor, properties);
|
| 1411 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1412 |
+
return status;
|
| 1413 |
+
}
|
| 1414 |
+
}
|
| 1415 |
+
else {
|
| 1416 |
+
sharedMemPerMultiprocessor = properties->sharedMemPerMultiprocessor;
|
| 1417 |
+
}
|
| 1418 |
+
}
|
| 1419 |
+
|
| 1420 |
+
if (smemAllocatedPerCTA > 0) {
|
| 1421 |
+
maxBlocks = (int)(sharedMemPerMultiprocessor / smemAllocatedPerCTA);
|
| 1422 |
+
}
|
| 1423 |
+
else {
|
| 1424 |
+
maxBlocks = INT_MAX;
|
| 1425 |
+
}
|
| 1426 |
+
}
|
| 1427 |
+
|
| 1428 |
+
result->allocatedSharedMemPerBlock = smemAllocatedPerCTA;
|
| 1429 |
+
|
| 1430 |
+
*limit = maxBlocks;
|
| 1431 |
+
|
| 1432 |
+
return status;
|
| 1433 |
+
}
|
| 1434 |
+
|
| 1435 |
+
static __OCC_INLINE
|
| 1436 |
+
cudaOccError cudaOccMaxBlocksPerSMRegsLimit(
|
| 1437 |
+
int *limit,
|
| 1438 |
+
cudaOccPartitionedGCConfig *gcConfig,
|
| 1439 |
+
cudaOccResult *result,
|
| 1440 |
+
const cudaOccDeviceProp *properties,
|
| 1441 |
+
const cudaOccFuncAttributes *attributes,
|
| 1442 |
+
int blockSize)
|
| 1443 |
+
{
|
| 1444 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1445 |
+
int allocationGranularity;
|
| 1446 |
+
int warpsAllocatedPerCTA;
|
| 1447 |
+
int regsAllocatedPerCTA;
|
| 1448 |
+
int regsAssumedPerCTA;
|
| 1449 |
+
int regsPerWarp;
|
| 1450 |
+
int regsAllocatedPerWarp;
|
| 1451 |
+
int numSubPartitions;
|
| 1452 |
+
int numRegsPerSubPartition;
|
| 1453 |
+
int numWarpsPerSubPartition;
|
| 1454 |
+
int numWarpsPerSM;
|
| 1455 |
+
int maxBlocks;
|
| 1456 |
+
int maxRegsPerThread;
|
| 1457 |
+
|
| 1458 |
+
status = cudaOccRegAllocationGranularity(
|
| 1459 |
+
&allocationGranularity,
|
| 1460 |
+
properties);
|
| 1461 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1462 |
+
return status;
|
| 1463 |
+
}
|
| 1464 |
+
|
| 1465 |
+
status = cudaOccRegAllocationMaxPerThread(
|
| 1466 |
+
&maxRegsPerThread,
|
| 1467 |
+
properties);
|
| 1468 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1469 |
+
return status;
|
| 1470 |
+
}
|
| 1471 |
+
|
| 1472 |
+
status = cudaOccSubPartitionsPerMultiprocessor(&numSubPartitions, properties);
|
| 1473 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1474 |
+
return status;
|
| 1475 |
+
}
|
| 1476 |
+
|
| 1477 |
+
warpsAllocatedPerCTA = __occDivideRoundUp(blockSize, properties->warpSize);
|
| 1478 |
+
|
| 1479 |
+
// GPUs of compute capability 2.x and higher allocate registers to warps
|
| 1480 |
+
//
|
| 1481 |
+
// Number of regs per warp is regs per thread x warp size, rounded up to
|
| 1482 |
+
// register allocation granularity
|
| 1483 |
+
//
|
| 1484 |
+
regsPerWarp = attributes->numRegs * properties->warpSize;
|
| 1485 |
+
regsAllocatedPerWarp = __occRoundUp(regsPerWarp, allocationGranularity);
|
| 1486 |
+
regsAllocatedPerCTA = regsAllocatedPerWarp * warpsAllocatedPerCTA;
|
| 1487 |
+
|
| 1488 |
+
// Hardware verifies if a launch fits the per-CTA register limit. For
|
| 1489 |
+
// historical reasons, the verification logic assumes register
|
| 1490 |
+
// allocations are made to all partitions simultaneously. Therefore, to
|
| 1491 |
+
// simulate the hardware check, the warp allocation needs to be rounded
|
| 1492 |
+
// up to the number of partitions.
|
| 1493 |
+
//
|
| 1494 |
+
regsAssumedPerCTA = regsAllocatedPerWarp * __occRoundUp(warpsAllocatedPerCTA, numSubPartitions);
|
| 1495 |
+
|
| 1496 |
+
if (properties->regsPerBlock < regsAssumedPerCTA || // Hardware check
|
| 1497 |
+
properties->regsPerBlock < regsAllocatedPerCTA || // Software check
|
| 1498 |
+
attributes->numRegs > maxRegsPerThread) { // Per thread limit check
|
| 1499 |
+
maxBlocks = 0;
|
| 1500 |
+
}
|
| 1501 |
+
else {
|
| 1502 |
+
if (regsAllocatedPerWarp > 0) {
|
| 1503 |
+
// Registers are allocated in each sub-partition. The max number
|
| 1504 |
+
// of warps that can fit on an SM is equal to the max number of
|
| 1505 |
+
// warps per sub-partition x number of sub-partitions.
|
| 1506 |
+
//
|
| 1507 |
+
numRegsPerSubPartition = properties->regsPerMultiprocessor / numSubPartitions;
|
| 1508 |
+
numWarpsPerSubPartition = numRegsPerSubPartition / regsAllocatedPerWarp;
|
| 1509 |
+
|
| 1510 |
+
maxBlocks = 0;
|
| 1511 |
+
|
| 1512 |
+
if (*gcConfig != PARTITIONED_GC_OFF) {
|
| 1513 |
+
int numSubPartitionsPerSmPartition;
|
| 1514 |
+
int numWarpsPerSmPartition;
|
| 1515 |
+
int maxBlocksPerSmPartition;
|
| 1516 |
+
|
| 1517 |
+
// If partitioned global caching is on, then a CTA can only
|
| 1518 |
+
// use a half SM, and thus a half of the registers available
|
| 1519 |
+
// per SM
|
| 1520 |
+
//
|
| 1521 |
+
numSubPartitionsPerSmPartition = numSubPartitions / 2;
|
| 1522 |
+
numWarpsPerSmPartition = numWarpsPerSubPartition * numSubPartitionsPerSmPartition;
|
| 1523 |
+
maxBlocksPerSmPartition = numWarpsPerSmPartition / warpsAllocatedPerCTA;
|
| 1524 |
+
maxBlocks = maxBlocksPerSmPartition * 2;
|
| 1525 |
+
}
|
| 1526 |
+
|
| 1527 |
+
// Try again if partitioned global caching is not enabled, or if
|
| 1528 |
+
// the CTA cannot fit on the SM with caching on (maxBlocks == 0). In the latter
|
| 1529 |
+
// case, the device will automatically turn off caching, except
|
| 1530 |
+
// if the user forces enablement via PARTITIONED_GC_ON_STRICT to calculate
|
| 1531 |
+
// occupancy and launch configuration.
|
| 1532 |
+
//
|
| 1533 |
+
if (maxBlocks == 0 && *gcConfig != PARTITIONED_GC_ON_STRICT) {
|
| 1534 |
+
// In case *gcConfig was PARTITIONED_GC_ON flip it OFF since
|
| 1535 |
+
// this is what it will be if we spread CTA across partitions.
|
| 1536 |
+
//
|
| 1537 |
+
*gcConfig = PARTITIONED_GC_OFF;
|
| 1538 |
+
numWarpsPerSM = numWarpsPerSubPartition * numSubPartitions;
|
| 1539 |
+
maxBlocks = numWarpsPerSM / warpsAllocatedPerCTA;
|
| 1540 |
+
}
|
| 1541 |
+
}
|
| 1542 |
+
else {
|
| 1543 |
+
maxBlocks = INT_MAX;
|
| 1544 |
+
}
|
| 1545 |
+
}
|
| 1546 |
+
|
| 1547 |
+
|
| 1548 |
+
result->allocatedRegistersPerBlock = regsAllocatedPerCTA;
|
| 1549 |
+
|
| 1550 |
+
*limit = maxBlocks;
|
| 1551 |
+
|
| 1552 |
+
return status;
|
| 1553 |
+
}
|
| 1554 |
+
|
| 1555 |
+
// Barrier limit
|
| 1556 |
+
//
|
| 1557 |
+
static __OCC_INLINE cudaOccError cudaOccMaxBlocksPerSMBlockBarrierLimit(
|
| 1558 |
+
int *limit,
|
| 1559 |
+
int ctaLimitBlocks,
|
| 1560 |
+
const cudaOccDeviceProp *properties,
|
| 1561 |
+
const cudaOccFuncAttributes *attributes)
|
| 1562 |
+
{
|
| 1563 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1564 |
+
int numBarriersAvailable = 0;
|
| 1565 |
+
int numBarriersUsed = attributes->numBlockBarriers;
|
| 1566 |
+
int maxBlocks = INT_MAX;
|
| 1567 |
+
|
| 1568 |
+
switch(properties->computeMajor) {
|
| 1569 |
+
case 5:
|
| 1570 |
+
case 6:
|
| 1571 |
+
case 7:
|
| 1572 |
+
numBarriersAvailable = ctaLimitBlocks * 2;
|
| 1573 |
+
break;
|
| 1574 |
+
case 8:
|
| 1575 |
+
if (properties->computeMinor == 0) {
|
| 1576 |
+
numBarriersAvailable = ctaLimitBlocks * 2;
|
| 1577 |
+
}
|
| 1578 |
+
else {
|
| 1579 |
+
numBarriersAvailable = ctaLimitBlocks;
|
| 1580 |
+
}
|
| 1581 |
+
break;
|
| 1582 |
+
case 9:
|
| 1583 |
+
numBarriersAvailable = ctaLimitBlocks * 2;
|
| 1584 |
+
break;
|
| 1585 |
+
case 10:
|
| 1586 |
+
switch(properties->computeMinor) {
|
| 1587 |
+
case 1 :
|
| 1588 |
+
numBarriersAvailable = ctaLimitBlocks;
|
| 1589 |
+
break;
|
| 1590 |
+
case 0 : /* explicitly added to avoid build failure in WDDM driver components. */
|
| 1591 |
+
default :
|
| 1592 |
+
numBarriersAvailable = ctaLimitBlocks * 2;
|
| 1593 |
+
}
|
| 1594 |
+
|
| 1595 |
+
break;
|
| 1596 |
+
case 12:
|
| 1597 |
+
numBarriersAvailable = ctaLimitBlocks;
|
| 1598 |
+
break;
|
| 1599 |
+
default:
|
| 1600 |
+
return CUDA_OCC_ERROR_UNKNOWN_DEVICE;
|
| 1601 |
+
}
|
| 1602 |
+
|
| 1603 |
+
if (numBarriersUsed) {
|
| 1604 |
+
maxBlocks = numBarriersAvailable / numBarriersUsed;
|
| 1605 |
+
}
|
| 1606 |
+
|
| 1607 |
+
*limit = maxBlocks;
|
| 1608 |
+
|
| 1609 |
+
return status;
|
| 1610 |
+
}
|
| 1611 |
+
|
| 1612 |
+
///////////////////////////////////
|
| 1613 |
+
// API Implementations //
|
| 1614 |
+
///////////////////////////////////
|
| 1615 |
+
|
| 1616 |
+
static __OCC_INLINE
|
| 1617 |
+
cudaOccError cudaOccMaxActiveBlocksPerMultiprocessor(
|
| 1618 |
+
cudaOccResult *result,
|
| 1619 |
+
const cudaOccDeviceProp *properties,
|
| 1620 |
+
const cudaOccFuncAttributes *attributes,
|
| 1621 |
+
const cudaOccDeviceState *state,
|
| 1622 |
+
int blockSize,
|
| 1623 |
+
size_t dynamicSmemSize)
|
| 1624 |
+
{
|
| 1625 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1626 |
+
int ctaLimitWarps = 0;
|
| 1627 |
+
int ctaLimitBlocks = 0;
|
| 1628 |
+
int ctaLimitSMem = 0;
|
| 1629 |
+
int ctaLimitRegs = 0;
|
| 1630 |
+
int ctaLimitBars = 0;
|
| 1631 |
+
int ctaLimit = 0;
|
| 1632 |
+
unsigned int limitingFactors = 0;
|
| 1633 |
+
|
| 1634 |
+
cudaOccPartitionedGCConfig gcConfig = PARTITIONED_GC_OFF;
|
| 1635 |
+
|
| 1636 |
+
if (!result || !properties || !attributes || !state || blockSize <= 0) {
|
| 1637 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1638 |
+
}
|
| 1639 |
+
|
| 1640 |
+
///////////////////////////
|
| 1641 |
+
// Check user input
|
| 1642 |
+
///////////////////////////
|
| 1643 |
+
|
| 1644 |
+
status = cudaOccInputCheck(properties, attributes, state);
|
| 1645 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1646 |
+
return status;
|
| 1647 |
+
}
|
| 1648 |
+
|
| 1649 |
+
///////////////////////////
|
| 1650 |
+
// Initialization
|
| 1651 |
+
///////////////////////////
|
| 1652 |
+
|
| 1653 |
+
gcConfig = cudaOccPartitionedGCExpected(properties, attributes);
|
| 1654 |
+
|
| 1655 |
+
///////////////////////////
|
| 1656 |
+
// Compute occupancy
|
| 1657 |
+
///////////////////////////
|
| 1658 |
+
|
| 1659 |
+
// Limits due to registers/SM
|
| 1660 |
+
// Also compute if partitioned global caching has to be turned off
|
| 1661 |
+
//
|
| 1662 |
+
status = cudaOccMaxBlocksPerSMRegsLimit(&ctaLimitRegs, &gcConfig, result, properties, attributes, blockSize);
|
| 1663 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1664 |
+
return status;
|
| 1665 |
+
}
|
| 1666 |
+
|
| 1667 |
+
// SMs on GP100 (6.0) have 2 subpartitions, while those on GP10x have 4.
|
| 1668 |
+
// As a result, an SM on GP100 may be able to run more CTAs than the one on GP10x.
|
| 1669 |
+
// For forward compatibility within Pascal family, if a function cannot run on GP10x (maxBlock == 0),
|
| 1670 |
+
// we do not let it run on any Pascal processor, even though it may be able to run on GP100.
|
| 1671 |
+
// Therefore, we check the occupancy on GP10x when it can run on GP100
|
| 1672 |
+
//
|
| 1673 |
+
if (properties->computeMajor == 6 && properties->computeMinor == 0 && ctaLimitRegs) {
|
| 1674 |
+
cudaOccDeviceProp propertiesGP10x;
|
| 1675 |
+
cudaOccPartitionedGCConfig gcConfigGP10x = gcConfig;
|
| 1676 |
+
int ctaLimitRegsGP10x = 0;
|
| 1677 |
+
|
| 1678 |
+
// Set up properties for GP10x
|
| 1679 |
+
memcpy(&propertiesGP10x, properties, sizeof(propertiesGP10x));
|
| 1680 |
+
propertiesGP10x.computeMinor = 1;
|
| 1681 |
+
|
| 1682 |
+
status = cudaOccMaxBlocksPerSMRegsLimit(&ctaLimitRegsGP10x, &gcConfigGP10x, result, &propertiesGP10x, attributes, blockSize);
|
| 1683 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1684 |
+
return status;
|
| 1685 |
+
}
|
| 1686 |
+
|
| 1687 |
+
if (ctaLimitRegsGP10x == 0) {
|
| 1688 |
+
ctaLimitRegs = 0;
|
| 1689 |
+
}
|
| 1690 |
+
}
|
| 1691 |
+
|
| 1692 |
+
// Limits due to warps/SM
|
| 1693 |
+
//
|
| 1694 |
+
status = cudaOccMaxBlocksPerSMWarpsLimit(&ctaLimitWarps, gcConfig, properties, attributes, blockSize);
|
| 1695 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1696 |
+
return status;
|
| 1697 |
+
}
|
| 1698 |
+
|
| 1699 |
+
// Limits due to blocks/SM
|
| 1700 |
+
//
|
| 1701 |
+
status = cudaOccMaxBlocksPerMultiprocessor(&ctaLimitBlocks, properties);
|
| 1702 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1703 |
+
return status;
|
| 1704 |
+
}
|
| 1705 |
+
|
| 1706 |
+
// Limits due to shared memory/SM
|
| 1707 |
+
//
|
| 1708 |
+
status = cudaOccMaxBlocksPerSMSmemLimit(&ctaLimitSMem, result, properties, attributes, state, blockSize, dynamicSmemSize);
|
| 1709 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1710 |
+
return status;
|
| 1711 |
+
}
|
| 1712 |
+
|
| 1713 |
+
///////////////////////////
|
| 1714 |
+
// Overall occupancy
|
| 1715 |
+
///////////////////////////
|
| 1716 |
+
|
| 1717 |
+
// Overall limit is min() of limits due to above reasons
|
| 1718 |
+
//
|
| 1719 |
+
ctaLimit = __occMin(ctaLimitRegs, __occMin(ctaLimitSMem, __occMin(ctaLimitWarps, ctaLimitBlocks)));
|
| 1720 |
+
|
| 1721 |
+
// Determine occupancy limiting factors
|
| 1722 |
+
//
|
| 1723 |
+
if (ctaLimit == ctaLimitWarps) {
|
| 1724 |
+
limitingFactors |= OCC_LIMIT_WARPS;
|
| 1725 |
+
}
|
| 1726 |
+
if (ctaLimit == ctaLimitRegs) {
|
| 1727 |
+
limitingFactors |= OCC_LIMIT_REGISTERS;
|
| 1728 |
+
}
|
| 1729 |
+
if (ctaLimit == ctaLimitSMem) {
|
| 1730 |
+
limitingFactors |= OCC_LIMIT_SHARED_MEMORY;
|
| 1731 |
+
}
|
| 1732 |
+
if (ctaLimit == ctaLimitBlocks) {
|
| 1733 |
+
limitingFactors |= OCC_LIMIT_BLOCKS;
|
| 1734 |
+
}
|
| 1735 |
+
|
| 1736 |
+
// For Hopper onwards compute the limits to occupancy based on block barrier count
|
| 1737 |
+
//
|
| 1738 |
+
if (properties->computeMajor >= 9 && attributes->numBlockBarriers > 0) {
|
| 1739 |
+
// Limits due to barrier/SM
|
| 1740 |
+
//
|
| 1741 |
+
status = cudaOccMaxBlocksPerSMBlockBarrierLimit(&ctaLimitBars, ctaLimitBlocks, properties, attributes);
|
| 1742 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1743 |
+
return status;
|
| 1744 |
+
}
|
| 1745 |
+
|
| 1746 |
+
// Recompute overall limit based on barrier/SM
|
| 1747 |
+
//
|
| 1748 |
+
ctaLimit = __occMin(ctaLimitBars, ctaLimit);
|
| 1749 |
+
|
| 1750 |
+
// Determine if this is occupancy limiting factor
|
| 1751 |
+
//
|
| 1752 |
+
if (ctaLimit == ctaLimitBars) {
|
| 1753 |
+
limitingFactors |= OCC_LIMIT_BARRIERS;
|
| 1754 |
+
}
|
| 1755 |
+
}
|
| 1756 |
+
else {
|
| 1757 |
+
ctaLimitBars = INT_MAX;
|
| 1758 |
+
}
|
| 1759 |
+
|
| 1760 |
+
// Fill in the return values
|
| 1761 |
+
//
|
| 1762 |
+
result->limitingFactors = limitingFactors;
|
| 1763 |
+
|
| 1764 |
+
result->blockLimitRegs = ctaLimitRegs;
|
| 1765 |
+
result->blockLimitSharedMem = ctaLimitSMem;
|
| 1766 |
+
result->blockLimitWarps = ctaLimitWarps;
|
| 1767 |
+
result->blockLimitBlocks = ctaLimitBlocks;
|
| 1768 |
+
result->blockLimitBarriers = ctaLimitBars;
|
| 1769 |
+
result->partitionedGCConfig = gcConfig;
|
| 1770 |
+
|
| 1771 |
+
// Final occupancy
|
| 1772 |
+
result->activeBlocksPerMultiprocessor = ctaLimit;
|
| 1773 |
+
|
| 1774 |
+
return CUDA_OCC_SUCCESS;
|
| 1775 |
+
}
|
| 1776 |
+
|
| 1777 |
+
static __OCC_INLINE
|
| 1778 |
+
cudaOccError cudaOccAvailableDynamicSMemPerBlock(
|
| 1779 |
+
size_t *bytesAvailable,
|
| 1780 |
+
const cudaOccDeviceProp *properties,
|
| 1781 |
+
const cudaOccFuncAttributes *attributes,
|
| 1782 |
+
const cudaOccDeviceState *state,
|
| 1783 |
+
int numBlocks,
|
| 1784 |
+
int blockSize)
|
| 1785 |
+
{
|
| 1786 |
+
int allocationGranularity;
|
| 1787 |
+
size_t smemLimitPerBlock;
|
| 1788 |
+
size_t smemAvailableForDynamic;
|
| 1789 |
+
size_t userSmemPreference = 0;
|
| 1790 |
+
size_t sharedMemPerMultiprocessor;
|
| 1791 |
+
cudaOccResult result;
|
| 1792 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1793 |
+
|
| 1794 |
+
if (numBlocks <= 0)
|
| 1795 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1796 |
+
|
| 1797 |
+
// First compute occupancy of potential kernel launch.
|
| 1798 |
+
//
|
| 1799 |
+
status = cudaOccMaxActiveBlocksPerMultiprocessor(&result, properties, attributes, state, blockSize, 0);
|
| 1800 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1801 |
+
return status;
|
| 1802 |
+
}
|
| 1803 |
+
// Check if occupancy is achievable given user requested number of blocks.
|
| 1804 |
+
//
|
| 1805 |
+
if (result.activeBlocksPerMultiprocessor < numBlocks) {
|
| 1806 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1807 |
+
}
|
| 1808 |
+
|
| 1809 |
+
status = cudaOccSMemAllocationGranularity(&allocationGranularity, properties);
|
| 1810 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1811 |
+
return status;
|
| 1812 |
+
}
|
| 1813 |
+
|
| 1814 |
+
// Return the per block shared memory limit based on function config.
|
| 1815 |
+
//
|
| 1816 |
+
status = cudaOccSMemPerBlock(&smemLimitPerBlock, properties, attributes->shmemLimitConfig, properties->sharedMemPerMultiprocessor);
|
| 1817 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1818 |
+
return status;
|
| 1819 |
+
}
|
| 1820 |
+
|
| 1821 |
+
// If there is only a single block needed per SM, then the user preference can be ignored and the fully SW
|
| 1822 |
+
// limit is allowed to be used as shared memory otherwise if more than one block is needed, then the user
|
| 1823 |
+
// preference sets the total limit of available shared memory.
|
| 1824 |
+
//
|
| 1825 |
+
cudaOccSMemPerMultiprocessor(&userSmemPreference, properties, state);
|
| 1826 |
+
if (numBlocks == 1) {
|
| 1827 |
+
sharedMemPerMultiprocessor = smemLimitPerBlock;
|
| 1828 |
+
}
|
| 1829 |
+
else {
|
| 1830 |
+
if (!userSmemPreference) {
|
| 1831 |
+
userSmemPreference = 1 ;
|
| 1832 |
+
status = cudaOccAlignUpShmemSizeVoltaPlus(&userSmemPreference, properties);
|
| 1833 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1834 |
+
return status;
|
| 1835 |
+
}
|
| 1836 |
+
}
|
| 1837 |
+
sharedMemPerMultiprocessor = userSmemPreference;
|
| 1838 |
+
}
|
| 1839 |
+
|
| 1840 |
+
// Compute total shared memory available per SM
|
| 1841 |
+
//
|
| 1842 |
+
smemAvailableForDynamic = sharedMemPerMultiprocessor / numBlocks;
|
| 1843 |
+
smemAvailableForDynamic = (smemAvailableForDynamic / allocationGranularity) * allocationGranularity;
|
| 1844 |
+
|
| 1845 |
+
// Cap shared memory
|
| 1846 |
+
//
|
| 1847 |
+
if (smemAvailableForDynamic > smemLimitPerBlock) {
|
| 1848 |
+
smemAvailableForDynamic = smemLimitPerBlock;
|
| 1849 |
+
}
|
| 1850 |
+
|
| 1851 |
+
// Now compute dynamic shared memory size
|
| 1852 |
+
smemAvailableForDynamic = smemAvailableForDynamic - attributes->sharedSizeBytes;
|
| 1853 |
+
|
| 1854 |
+
// Cap computed dynamic SM by user requested limit specified via cuFuncSetAttribute()
|
| 1855 |
+
//
|
| 1856 |
+
if (smemAvailableForDynamic > attributes->maxDynamicSharedSizeBytes)
|
| 1857 |
+
smemAvailableForDynamic = attributes->maxDynamicSharedSizeBytes;
|
| 1858 |
+
|
| 1859 |
+
*bytesAvailable = smemAvailableForDynamic;
|
| 1860 |
+
return CUDA_OCC_SUCCESS;
|
| 1861 |
+
}
|
| 1862 |
+
|
| 1863 |
+
static __OCC_INLINE
|
| 1864 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSize(
|
| 1865 |
+
int *minGridSize,
|
| 1866 |
+
int *blockSize,
|
| 1867 |
+
const cudaOccDeviceProp *properties,
|
| 1868 |
+
const cudaOccFuncAttributes *attributes,
|
| 1869 |
+
const cudaOccDeviceState *state,
|
| 1870 |
+
size_t (*blockSizeToDynamicSMemSize)(int),
|
| 1871 |
+
size_t dynamicSMemSize)
|
| 1872 |
+
{
|
| 1873 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 1874 |
+
cudaOccResult result;
|
| 1875 |
+
|
| 1876 |
+
// Limits
|
| 1877 |
+
int occupancyLimit;
|
| 1878 |
+
int granularity;
|
| 1879 |
+
int blockSizeLimit;
|
| 1880 |
+
|
| 1881 |
+
// Recorded maximum
|
| 1882 |
+
int maxBlockSize = 0;
|
| 1883 |
+
int numBlocks = 0;
|
| 1884 |
+
int maxOccupancy = 0;
|
| 1885 |
+
|
| 1886 |
+
// Temporary
|
| 1887 |
+
int blockSizeToTryAligned;
|
| 1888 |
+
int blockSizeToTry;
|
| 1889 |
+
int blockSizeLimitAligned;
|
| 1890 |
+
int occupancyInBlocks;
|
| 1891 |
+
int occupancyInThreads;
|
| 1892 |
+
|
| 1893 |
+
///////////////////////////
|
| 1894 |
+
// Check user input
|
| 1895 |
+
///////////////////////////
|
| 1896 |
+
|
| 1897 |
+
if (!minGridSize || !blockSize || !properties || !attributes || !state) {
|
| 1898 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 1899 |
+
}
|
| 1900 |
+
|
| 1901 |
+
status = cudaOccInputCheck(properties, attributes, state);
|
| 1902 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1903 |
+
return status;
|
| 1904 |
+
}
|
| 1905 |
+
|
| 1906 |
+
/////////////////////////////////////////////////////////////////////////////////
|
| 1907 |
+
// Try each block size, and pick the block size with maximum occupancy
|
| 1908 |
+
/////////////////////////////////////////////////////////////////////////////////
|
| 1909 |
+
|
| 1910 |
+
occupancyLimit = properties->maxThreadsPerMultiprocessor;
|
| 1911 |
+
granularity = properties->warpSize;
|
| 1912 |
+
|
| 1913 |
+
blockSizeLimit = __occMin(properties->maxThreadsPerBlock, attributes->maxThreadsPerBlock);
|
| 1914 |
+
blockSizeLimitAligned = __occRoundUp(blockSizeLimit, granularity);
|
| 1915 |
+
|
| 1916 |
+
for (blockSizeToTryAligned = blockSizeLimitAligned; blockSizeToTryAligned > 0; blockSizeToTryAligned -= granularity) {
|
| 1917 |
+
blockSizeToTry = __occMin(blockSizeLimit, blockSizeToTryAligned);
|
| 1918 |
+
|
| 1919 |
+
// Ignore dynamicSMemSize if the user provides a mapping
|
| 1920 |
+
//
|
| 1921 |
+
if (blockSizeToDynamicSMemSize) {
|
| 1922 |
+
dynamicSMemSize = (*blockSizeToDynamicSMemSize)(blockSizeToTry);
|
| 1923 |
+
}
|
| 1924 |
+
|
| 1925 |
+
status = cudaOccMaxActiveBlocksPerMultiprocessor(
|
| 1926 |
+
&result,
|
| 1927 |
+
properties,
|
| 1928 |
+
attributes,
|
| 1929 |
+
state,
|
| 1930 |
+
blockSizeToTry,
|
| 1931 |
+
dynamicSMemSize);
|
| 1932 |
+
|
| 1933 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 1934 |
+
return status;
|
| 1935 |
+
}
|
| 1936 |
+
|
| 1937 |
+
occupancyInBlocks = result.activeBlocksPerMultiprocessor;
|
| 1938 |
+
occupancyInThreads = blockSizeToTry * occupancyInBlocks;
|
| 1939 |
+
|
| 1940 |
+
if (occupancyInThreads > maxOccupancy) {
|
| 1941 |
+
maxBlockSize = blockSizeToTry;
|
| 1942 |
+
numBlocks = occupancyInBlocks;
|
| 1943 |
+
maxOccupancy = occupancyInThreads;
|
| 1944 |
+
}
|
| 1945 |
+
|
| 1946 |
+
// Early out if we have reached the maximum
|
| 1947 |
+
//
|
| 1948 |
+
if (occupancyLimit == maxOccupancy) {
|
| 1949 |
+
break;
|
| 1950 |
+
}
|
| 1951 |
+
}
|
| 1952 |
+
|
| 1953 |
+
///////////////////////////
|
| 1954 |
+
// Return best available
|
| 1955 |
+
///////////////////////////
|
| 1956 |
+
|
| 1957 |
+
// Suggested min grid size to achieve a full machine launch
|
| 1958 |
+
//
|
| 1959 |
+
*minGridSize = numBlocks * properties->numSms;
|
| 1960 |
+
*blockSize = maxBlockSize;
|
| 1961 |
+
|
| 1962 |
+
return status;
|
| 1963 |
+
}
|
| 1964 |
+
|
| 1965 |
+
|
| 1966 |
+
#if defined(__cplusplus)
|
| 1967 |
+
|
| 1968 |
+
namespace {
|
| 1969 |
+
|
| 1970 |
+
__OCC_INLINE
|
| 1971 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSize(
|
| 1972 |
+
int *minGridSize,
|
| 1973 |
+
int *blockSize,
|
| 1974 |
+
const cudaOccDeviceProp *properties,
|
| 1975 |
+
const cudaOccFuncAttributes *attributes,
|
| 1976 |
+
const cudaOccDeviceState *state,
|
| 1977 |
+
size_t dynamicSMemSize)
|
| 1978 |
+
{
|
| 1979 |
+
return cudaOccMaxPotentialOccupancyBlockSize(
|
| 1980 |
+
minGridSize,
|
| 1981 |
+
blockSize,
|
| 1982 |
+
properties,
|
| 1983 |
+
attributes,
|
| 1984 |
+
state,
|
| 1985 |
+
NULL,
|
| 1986 |
+
dynamicSMemSize);
|
| 1987 |
+
}
|
| 1988 |
+
|
| 1989 |
+
template <typename UnaryFunction>
|
| 1990 |
+
__OCC_INLINE
|
| 1991 |
+
cudaOccError cudaOccMaxPotentialOccupancyBlockSizeVariableSMem(
|
| 1992 |
+
int *minGridSize,
|
| 1993 |
+
int *blockSize,
|
| 1994 |
+
const cudaOccDeviceProp *properties,
|
| 1995 |
+
const cudaOccFuncAttributes *attributes,
|
| 1996 |
+
const cudaOccDeviceState *state,
|
| 1997 |
+
UnaryFunction blockSizeToDynamicSMemSize)
|
| 1998 |
+
{
|
| 1999 |
+
cudaOccError status = CUDA_OCC_SUCCESS;
|
| 2000 |
+
cudaOccResult result;
|
| 2001 |
+
|
| 2002 |
+
// Limits
|
| 2003 |
+
int occupancyLimit;
|
| 2004 |
+
int granularity;
|
| 2005 |
+
int blockSizeLimit;
|
| 2006 |
+
|
| 2007 |
+
// Recorded maximum
|
| 2008 |
+
int maxBlockSize = 0;
|
| 2009 |
+
int numBlocks = 0;
|
| 2010 |
+
int maxOccupancy = 0;
|
| 2011 |
+
|
| 2012 |
+
// Temporary
|
| 2013 |
+
int blockSizeToTryAligned;
|
| 2014 |
+
int blockSizeToTry;
|
| 2015 |
+
int blockSizeLimitAligned;
|
| 2016 |
+
int occupancyInBlocks;
|
| 2017 |
+
int occupancyInThreads;
|
| 2018 |
+
size_t dynamicSMemSize;
|
| 2019 |
+
|
| 2020 |
+
///////////////////////////
|
| 2021 |
+
// Check user input
|
| 2022 |
+
///////////////////////////
|
| 2023 |
+
|
| 2024 |
+
if (!minGridSize || !blockSize || !properties || !attributes || !state) {
|
| 2025 |
+
return CUDA_OCC_ERROR_INVALID_INPUT;
|
| 2026 |
+
}
|
| 2027 |
+
|
| 2028 |
+
status = cudaOccInputCheck(properties, attributes, state);
|
| 2029 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 2030 |
+
return status;
|
| 2031 |
+
}
|
| 2032 |
+
|
| 2033 |
+
/////////////////////////////////////////////////////////////////////////////////
|
| 2034 |
+
// Try each block size, and pick the block size with maximum occupancy
|
| 2035 |
+
/////////////////////////////////////////////////////////////////////////////////
|
| 2036 |
+
|
| 2037 |
+
occupancyLimit = properties->maxThreadsPerMultiprocessor;
|
| 2038 |
+
granularity = properties->warpSize;
|
| 2039 |
+
blockSizeLimit = __occMin(properties->maxThreadsPerBlock, attributes->maxThreadsPerBlock);
|
| 2040 |
+
blockSizeLimitAligned = __occRoundUp(blockSizeLimit, granularity);
|
| 2041 |
+
|
| 2042 |
+
for (blockSizeToTryAligned = blockSizeLimitAligned; blockSizeToTryAligned > 0; blockSizeToTryAligned -= granularity) {
|
| 2043 |
+
blockSizeToTry = __occMin(blockSizeLimit, blockSizeToTryAligned);
|
| 2044 |
+
|
| 2045 |
+
dynamicSMemSize = blockSizeToDynamicSMemSize(blockSizeToTry);
|
| 2046 |
+
|
| 2047 |
+
status = cudaOccMaxActiveBlocksPerMultiprocessor(
|
| 2048 |
+
&result,
|
| 2049 |
+
properties,
|
| 2050 |
+
attributes,
|
| 2051 |
+
state,
|
| 2052 |
+
blockSizeToTry,
|
| 2053 |
+
dynamicSMemSize);
|
| 2054 |
+
|
| 2055 |
+
if (status != CUDA_OCC_SUCCESS) {
|
| 2056 |
+
return status;
|
| 2057 |
+
}
|
| 2058 |
+
|
| 2059 |
+
occupancyInBlocks = result.activeBlocksPerMultiprocessor;
|
| 2060 |
+
|
| 2061 |
+
occupancyInThreads = blockSizeToTry * occupancyInBlocks;
|
| 2062 |
+
|
| 2063 |
+
if (occupancyInThreads > maxOccupancy) {
|
| 2064 |
+
maxBlockSize = blockSizeToTry;
|
| 2065 |
+
numBlocks = occupancyInBlocks;
|
| 2066 |
+
maxOccupancy = occupancyInThreads;
|
| 2067 |
+
}
|
| 2068 |
+
|
| 2069 |
+
// Early out if we have reached the maximum
|
| 2070 |
+
//
|
| 2071 |
+
if (occupancyLimit == maxOccupancy) {
|
| 2072 |
+
break;
|
| 2073 |
+
}
|
| 2074 |
+
}
|
| 2075 |
+
|
| 2076 |
+
///////////////////////////
|
| 2077 |
+
// Return best available
|
| 2078 |
+
///////////////////////////
|
| 2079 |
+
|
| 2080 |
+
// Suggested min grid size to achieve a full machine launch
|
| 2081 |
+
//
|
| 2082 |
+
*minGridSize = numBlocks * properties->numSms;
|
| 2083 |
+
*blockSize = maxBlockSize;
|
| 2084 |
+
|
| 2085 |
+
return status;
|
| 2086 |
+
}
|
| 2087 |
+
|
| 2088 |
+
} // namespace anonymous
|
| 2089 |
+
|
| 2090 |
+
#endif /*__cplusplus */
|
| 2091 |
+
|
| 2092 |
+
#undef __OCC_INLINE
|
| 2093 |
+
|
| 2094 |
+
#endif /*__cuda_occupancy_h__*/
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cuda_pipeline_primitives.h
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#ifndef _CUDA_PIPELINE_PRIMITIVES_H_
|
| 51 |
+
# define _CUDA_PIPELINE_PRIMITIVES_H_
|
| 52 |
+
|
| 53 |
+
# include "cuda_pipeline_helpers.h"
|
| 54 |
+
|
| 55 |
+
_CUDA_PIPELINE_STATIC_QUALIFIER
|
| 56 |
+
void __pipeline_memcpy_async(void* __restrict__ dst_shared, const void* __restrict__ src_global, size_t size_and_align,
|
| 57 |
+
size_t zfill = 0)
|
| 58 |
+
{
|
| 59 |
+
_CUDA_PIPELINE_ASSERT(size_and_align == 4 || size_and_align == 8 || size_and_align == 16);
|
| 60 |
+
_CUDA_PIPELINE_ASSERT(zfill <= size_and_align);
|
| 61 |
+
_CUDA_PIPELINE_ASSERT(__isShared(dst_shared));
|
| 62 |
+
_CUDA_PIPELINE_ASSERT(__isGlobal(src_global));
|
| 63 |
+
_CUDA_PIPELINE_ASSERT(!(reinterpret_cast<uintptr_t>(dst_shared) & (size_and_align - 1)));
|
| 64 |
+
_CUDA_PIPELINE_ASSERT(!(reinterpret_cast<uintptr_t>(src_global) & (size_and_align - 1)));
|
| 65 |
+
|
| 66 |
+
switch (size_and_align) {
|
| 67 |
+
case 16:
|
| 68 |
+
switch (zfill) {
|
| 69 |
+
case 0: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 16>(dst_shared, src_global); return;
|
| 70 |
+
case 1: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 15>(dst_shared, src_global); return;
|
| 71 |
+
case 2: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 14>(dst_shared, src_global); return;
|
| 72 |
+
case 3: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 13>(dst_shared, src_global); return;
|
| 73 |
+
case 4: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 12>(dst_shared, src_global); return;
|
| 74 |
+
case 5: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 11>(dst_shared, src_global); return;
|
| 75 |
+
case 6: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 10>(dst_shared, src_global); return;
|
| 76 |
+
case 7: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 9>(dst_shared, src_global); return;
|
| 77 |
+
case 8: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 8>(dst_shared, src_global); return;
|
| 78 |
+
case 9: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 7>(dst_shared, src_global); return;
|
| 79 |
+
case 10: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 6>(dst_shared, src_global); return;
|
| 80 |
+
case 11: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 5>(dst_shared, src_global); return;
|
| 81 |
+
case 12: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 4>(dst_shared, src_global); return;
|
| 82 |
+
case 13: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 3>(dst_shared, src_global); return;
|
| 83 |
+
case 14: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 2>(dst_shared, src_global); return;
|
| 84 |
+
case 15: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 1>(dst_shared, src_global); return;
|
| 85 |
+
case 16: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async<16, 0>(dst_shared, src_global); return;
|
| 86 |
+
default: _CUDA_PIPELINE_ABORT(); return;
|
| 87 |
+
}
|
| 88 |
+
case 8:
|
| 89 |
+
switch (zfill) {
|
| 90 |
+
case 0: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 8>(dst_shared, src_global); return;
|
| 91 |
+
case 1: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 7>(dst_shared, src_global); return;
|
| 92 |
+
case 2: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 6>(dst_shared, src_global); return;
|
| 93 |
+
case 3: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 5>(dst_shared, src_global); return;
|
| 94 |
+
case 4: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 4>(dst_shared, src_global); return;
|
| 95 |
+
case 5: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 3>(dst_shared, src_global); return;
|
| 96 |
+
case 6: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 2>(dst_shared, src_global); return;
|
| 97 |
+
case 7: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 1>(dst_shared, src_global); return;
|
| 98 |
+
case 8: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 8, 0>(dst_shared, src_global); return;
|
| 99 |
+
default: _CUDA_PIPELINE_ABORT(); return;
|
| 100 |
+
}
|
| 101 |
+
case 4:
|
| 102 |
+
switch (zfill) {
|
| 103 |
+
case 0: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 4, 4>(dst_shared, src_global); return;
|
| 104 |
+
case 1: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 4, 3>(dst_shared, src_global); return;
|
| 105 |
+
case 2: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 4, 2>(dst_shared, src_global); return;
|
| 106 |
+
case 3: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 4, 1>(dst_shared, src_global); return;
|
| 107 |
+
case 4: _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_memcpy_async< 4, 0>(dst_shared, src_global); return;
|
| 108 |
+
default: _CUDA_PIPELINE_ABORT(); return;
|
| 109 |
+
}
|
| 110 |
+
default:
|
| 111 |
+
_CUDA_PIPELINE_ABORT();
|
| 112 |
+
return;
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
_CUDA_PIPELINE_STATIC_QUALIFIER
|
| 117 |
+
void __pipeline_commit()
|
| 118 |
+
{
|
| 119 |
+
_CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_commit();
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
_CUDA_PIPELINE_STATIC_QUALIFIER
|
| 123 |
+
void __pipeline_wait_prior(size_t prior)
|
| 124 |
+
{
|
| 125 |
+
switch (prior) {
|
| 126 |
+
case 0 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<0>(); return;
|
| 127 |
+
case 1 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<1>(); return;
|
| 128 |
+
case 2 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<2>(); return;
|
| 129 |
+
case 3 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<3>(); return;
|
| 130 |
+
case 4 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<4>(); return;
|
| 131 |
+
case 5 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<5>(); return;
|
| 132 |
+
case 6 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<6>(); return;
|
| 133 |
+
case 7 : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<7>(); return;
|
| 134 |
+
default : _CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_wait_prior<8>(); return;
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
# if defined(_CUDA_PIPELINE_ARCH_700_OR_LATER)
|
| 139 |
+
# include "cuda_awbarrier_primitives.h"
|
| 140 |
+
|
| 141 |
+
_CUDA_PIPELINE_STATIC_QUALIFIER
|
| 142 |
+
void __pipeline_arrive_on(__mbarrier_t* barrier)
|
| 143 |
+
{
|
| 144 |
+
_CUDA_PIPELINE_INTERNAL_NAMESPACE::pipeline_arrive_on(barrier);
|
| 145 |
+
}
|
| 146 |
+
# endif
|
| 147 |
+
|
| 148 |
+
#endif /* !_CUDA_PIPELINE_PRIMITIVES_H_ */
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti.h
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2010-2017 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_H_)
|
| 51 |
+
#define _CUPTI_H_
|
| 52 |
+
|
| 53 |
+
#ifdef _WIN32
|
| 54 |
+
#ifndef WIN32_LEAN_AND_MEAN
|
| 55 |
+
#define WIN32_LEAN_AND_MEAN
|
| 56 |
+
#endif
|
| 57 |
+
#ifdef NOMINMAX
|
| 58 |
+
#include <windows.h>
|
| 59 |
+
#else
|
| 60 |
+
#define NOMINMAX
|
| 61 |
+
#include <windows.h>
|
| 62 |
+
#undef NOMINMAX
|
| 63 |
+
#endif
|
| 64 |
+
#endif
|
| 65 |
+
|
| 66 |
+
#include <cuda.h>
|
| 67 |
+
#include <cupti_result.h>
|
| 68 |
+
#include <cupti_version.h>
|
| 69 |
+
|
| 70 |
+
/* Activity, callback, event and metric APIs */
|
| 71 |
+
#include <cupti_activity.h>
|
| 72 |
+
#include <cupti_callbacks.h>
|
| 73 |
+
#include <cupti_events.h>
|
| 74 |
+
#include <cupti_metrics.h>
|
| 75 |
+
|
| 76 |
+
/* Runtime, driver, and nvtx function identifiers */
|
| 77 |
+
#include <cupti_driver_cbid.h>
|
| 78 |
+
#include <cupti_runtime_cbid.h>
|
| 79 |
+
#include <cupti_nvtx_cbid.h>
|
| 80 |
+
|
| 81 |
+
/* To support function parameter structures for obsoleted API. See
|
| 82 |
+
cuda.h for the actual definition of these structures. */
|
| 83 |
+
typedef unsigned int CUdeviceptr_v1;
|
| 84 |
+
typedef struct CUDA_MEMCPY2D_v1_st { int dummy; } CUDA_MEMCPY2D_v1;
|
| 85 |
+
typedef struct CUDA_MEMCPY3D_v1_st { int dummy; } CUDA_MEMCPY3D_v1;
|
| 86 |
+
typedef struct CUDA_ARRAY_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY_DESCRIPTOR_v1;
|
| 87 |
+
typedef struct CUDA_ARRAY3D_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY3D_DESCRIPTOR_v1;
|
| 88 |
+
|
| 89 |
+
/* Function parameter structures */
|
| 90 |
+
#include <generated_cuda_runtime_api_meta.h>
|
| 91 |
+
#include <generated_cuda_meta.h>
|
| 92 |
+
|
| 93 |
+
/* The following parameter structures cannot be included unless a
|
| 94 |
+
header that defines GL_VERSION is included before including them.
|
| 95 |
+
If these are needed then make sure such a header is included
|
| 96 |
+
already. */
|
| 97 |
+
#ifdef GL_VERSION
|
| 98 |
+
#include <generated_cuda_gl_interop_meta.h>
|
| 99 |
+
#include <generated_cudaGL_meta.h>
|
| 100 |
+
#endif
|
| 101 |
+
|
| 102 |
+
//#include <generated_nvtx_meta.h>
|
| 103 |
+
|
| 104 |
+
/* The following parameter structures cannot be included by default as
|
| 105 |
+
they are not guaranteed to be available on all systems. Uncomment
|
| 106 |
+
the includes that are available, or use the include explicitly. */
|
| 107 |
+
#if defined(__linux__)
|
| 108 |
+
//#include <generated_cuda_vdpau_interop_meta.h>
|
| 109 |
+
//#include <generated_cudaVDPAU_meta.h>
|
| 110 |
+
#endif
|
| 111 |
+
|
| 112 |
+
#ifdef _WIN32
|
| 113 |
+
//#include <generated_cuda_d3d9_interop_meta.h>
|
| 114 |
+
//#include <generated_cuda_d3d10_interop_meta.h>
|
| 115 |
+
//#include <generated_cuda_d3d11_interop_meta.h>
|
| 116 |
+
//#include <generated_cudaD3D9_meta.h>
|
| 117 |
+
//#include <generated_cudaD3D10_meta.h>
|
| 118 |
+
//#include <generated_cudaD3D11_meta.h>
|
| 119 |
+
#endif
|
| 120 |
+
|
| 121 |
+
#endif /*_CUPTI_H_*/
|
| 122 |
+
|
| 123 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_events.h
ADDED
|
@@ -0,0 +1,1349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2010-2024 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_EVENTS_H_)
|
| 51 |
+
#define _CUPTI_EVENTS_H_
|
| 52 |
+
|
| 53 |
+
#include <cuda.h>
|
| 54 |
+
#include <string.h>
|
| 55 |
+
#include <cuda_stdint.h>
|
| 56 |
+
#include <cupti_result.h>
|
| 57 |
+
|
| 58 |
+
#ifndef CUPTIAPI
|
| 59 |
+
#ifdef _WIN32
|
| 60 |
+
#define CUPTIAPI __stdcall
|
| 61 |
+
#else
|
| 62 |
+
#define CUPTIAPI
|
| 63 |
+
#endif
|
| 64 |
+
#endif
|
| 65 |
+
|
| 66 |
+
#if defined(__cplusplus)
|
| 67 |
+
extern "C" {
|
| 68 |
+
#endif
|
| 69 |
+
|
| 70 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 71 |
+
#pragma GCC visibility push(default)
|
| 72 |
+
#endif
|
| 73 |
+
|
| 74 |
+
/**
|
| 75 |
+
* \defgroup CUPTI_EVENT_API CUPTI Event API
|
| 76 |
+
* Functions, types, and enums that implement the CUPTI Event API.
|
| 77 |
+
*
|
| 78 |
+
* \note The CUPTI event API from the header cupti_events.h is not supported on devices
|
| 79 |
+
* with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
|
| 80 |
+
* This API is deprecated in CUDA 12.8 release and will be removed in a future CUDA release.
|
| 81 |
+
* This is replaced by the host profiling API in the header cupti_profiler_host.h and
|
| 82 |
+
* target profiling API in the header cupti_range_profiler.h which are supported on
|
| 83 |
+
* devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
|
| 84 |
+
*
|
| 85 |
+
* @{
|
| 86 |
+
*/
|
| 87 |
+
|
| 88 |
+
/**
|
| 89 |
+
* \brief ID for an event.
|
| 90 |
+
*
|
| 91 |
+
* An event represents a countable activity, action, or occurrence on
|
| 92 |
+
* the device.
|
| 93 |
+
*/
|
| 94 |
+
typedef uint32_t CUpti_EventID;
|
| 95 |
+
|
| 96 |
+
/**
|
| 97 |
+
* \brief ID for an event domain.
|
| 98 |
+
*
|
| 99 |
+
* ID for an event domain. An event domain represents a group of
|
| 100 |
+
* related events. A device may have multiple instances of a domain,
|
| 101 |
+
* indicating that the device can simultaneously record multiple
|
| 102 |
+
* instances of each event within that domain.
|
| 103 |
+
*/
|
| 104 |
+
typedef uint32_t CUpti_EventDomainID;
|
| 105 |
+
|
| 106 |
+
/**
|
| 107 |
+
* \brief A group of events.
|
| 108 |
+
*
|
| 109 |
+
* An event group is a collection of events that are managed
|
| 110 |
+
* together. All events in an event group must belong to the same
|
| 111 |
+
* domain.
|
| 112 |
+
*/
|
| 113 |
+
typedef void *CUpti_EventGroup;
|
| 114 |
+
|
| 115 |
+
/**
|
| 116 |
+
* \brief Device class.
|
| 117 |
+
*
|
| 118 |
+
* Enumeration of device classes for device attribute
|
| 119 |
+
* CUPTI_DEVICE_ATTR_DEVICE_CLASS.
|
| 120 |
+
*/
|
| 121 |
+
typedef enum {
|
| 122 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA = 0,
|
| 123 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO = 1,
|
| 124 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE = 2,
|
| 125 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA = 3,
|
| 126 |
+
} CUpti_DeviceAttributeDeviceClass;
|
| 127 |
+
|
| 128 |
+
/**
|
| 129 |
+
* \brief Device attributes.
|
| 130 |
+
*
|
| 131 |
+
* CUPTI device attributes. These attributes can be read using \ref
|
| 132 |
+
* cuptiDeviceGetAttribute.
|
| 133 |
+
*/
|
| 134 |
+
typedef enum {
|
| 135 |
+
/**
|
| 136 |
+
* Number of event IDs for a device. Value is a uint32_t.
|
| 137 |
+
*/
|
| 138 |
+
CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1,
|
| 139 |
+
/**
|
| 140 |
+
* Number of event domain IDs for a device. Value is a uint32_t.
|
| 141 |
+
*/
|
| 142 |
+
CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2,
|
| 143 |
+
/**
|
| 144 |
+
* Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
|
| 145 |
+
*/
|
| 146 |
+
CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3,
|
| 147 |
+
/**
|
| 148 |
+
* Get theoretical maximum number of instructions per cycle. Value
|
| 149 |
+
* is a uint32_t.
|
| 150 |
+
*/
|
| 151 |
+
CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4,
|
| 152 |
+
/**
|
| 153 |
+
* Get theoretical maximum number of single precision instructions
|
| 154 |
+
* that can be executed per second. Value is a uint64_t.
|
| 155 |
+
*/
|
| 156 |
+
CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5,
|
| 157 |
+
/**
|
| 158 |
+
* Get number of frame buffers for device. Value is a uint64_t.
|
| 159 |
+
*/
|
| 160 |
+
CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6,
|
| 161 |
+
/**
|
| 162 |
+
* Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type
|
| 163 |
+
* is non-PCIE. Value is a uint64_t.
|
| 164 |
+
*/
|
| 165 |
+
CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7,
|
| 166 |
+
/**
|
| 167 |
+
* Get PCIE link width for device. Return 0 if bus-type
|
| 168 |
+
* is non-PCIE. Value is a uint64_t.
|
| 169 |
+
*/
|
| 170 |
+
CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8,
|
| 171 |
+
/**
|
| 172 |
+
* Get PCIE generation for device. Return 0 if bus-type
|
| 173 |
+
* is non-PCIE. Value is a uint64_t.
|
| 174 |
+
*/
|
| 175 |
+
CUPTI_DEVICE_ATTR_PCIE_GEN = 9,
|
| 176 |
+
/**
|
| 177 |
+
* Get the class for the device. Value is a
|
| 178 |
+
* CUpti_DeviceAttributeDeviceClass.
|
| 179 |
+
*/
|
| 180 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10,
|
| 181 |
+
/**
|
| 182 |
+
* Get the peak single precision flop per cycle. Value is a uint64_t.
|
| 183 |
+
*/
|
| 184 |
+
CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11,
|
| 185 |
+
/**
|
| 186 |
+
* Get the peak double precision flop per cycle. Value is a uint64_t.
|
| 187 |
+
*/
|
| 188 |
+
CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12,
|
| 189 |
+
/**
|
| 190 |
+
* Get number of L2 units. Value is a uint64_t.
|
| 191 |
+
*/
|
| 192 |
+
CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13,
|
| 193 |
+
/**
|
| 194 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED
|
| 195 |
+
* preference. Value is a uint64_t.
|
| 196 |
+
*/
|
| 197 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14,
|
| 198 |
+
/**
|
| 199 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1
|
| 200 |
+
* preference. Value is a uint64_t.
|
| 201 |
+
*/
|
| 202 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15,
|
| 203 |
+
/**
|
| 204 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL
|
| 205 |
+
* preference. Value is a uint64_t.
|
| 206 |
+
*/
|
| 207 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16,
|
| 208 |
+
/**
|
| 209 |
+
* Get the peak half precision flop per cycle. Value is a uint64_t.
|
| 210 |
+
*/
|
| 211 |
+
CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17,
|
| 212 |
+
/**
|
| 213 |
+
* Check if Nvlink is connected to device. Returns 1, if at least one
|
| 214 |
+
* Nvlink is connected to the device, returns 0 otherwise.
|
| 215 |
+
* Value is a uint32_t.
|
| 216 |
+
*/
|
| 217 |
+
CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18,
|
| 218 |
+
/**
|
| 219 |
+
* Check if Nvlink is present between GPU and CPU. Returns Bandwidth,
|
| 220 |
+
* in Bytes/sec, if Nvlink is present, returns 0 otherwise.
|
| 221 |
+
* Value is a uint64_t.
|
| 222 |
+
*/
|
| 223 |
+
CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19,
|
| 224 |
+
/**
|
| 225 |
+
* Check if NVSwitch is present in the underlying topology.
|
| 226 |
+
* Returns 1, if present, returns 0 otherwise.
|
| 227 |
+
* Value is a uint32_t.
|
| 228 |
+
*/
|
| 229 |
+
CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20,
|
| 230 |
+
CUPTI_DEVICE_ATTR_FORCE_INT = 0x7fffffff,
|
| 231 |
+
} CUpti_DeviceAttribute;
|
| 232 |
+
|
| 233 |
+
/**
|
| 234 |
+
* \brief Event domain attributes.
|
| 235 |
+
*
|
| 236 |
+
* Event domain attributes. Except where noted, all the attributes can
|
| 237 |
+
* be read using either \ref cuptiDeviceGetEventDomainAttribute or
|
| 238 |
+
* \ref cuptiEventDomainGetAttribute.
|
| 239 |
+
*/
|
| 240 |
+
typedef enum {
|
| 241 |
+
/**
|
| 242 |
+
* Event domain name. Value is a null terminated const c-string.
|
| 243 |
+
*/
|
| 244 |
+
CUPTI_EVENT_DOMAIN_ATTR_NAME = 0,
|
| 245 |
+
/**
|
| 246 |
+
* Number of instances of the domain for which event counts will be
|
| 247 |
+
* collected. The domain may have additional instances that cannot
|
| 248 |
+
* be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT).
|
| 249 |
+
* Can be read only with \ref
|
| 250 |
+
* cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
|
| 251 |
+
*/
|
| 252 |
+
CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1,
|
| 253 |
+
/**
|
| 254 |
+
* Total number of instances of the domain, including instances that
|
| 255 |
+
* cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT
|
| 256 |
+
* to get the number of instances that can be profiled. Can be read
|
| 257 |
+
* only with \ref cuptiDeviceGetEventDomainAttribute. Value is a
|
| 258 |
+
* uint32_t.
|
| 259 |
+
*/
|
| 260 |
+
CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3,
|
| 261 |
+
/**
|
| 262 |
+
* Collection method used for events contained in the event domain.
|
| 263 |
+
* Value is a \ref CUpti_EventCollectionMethod.
|
| 264 |
+
*/
|
| 265 |
+
CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4,
|
| 266 |
+
|
| 267 |
+
CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT = 0x7fffffff,
|
| 268 |
+
} CUpti_EventDomainAttribute;
|
| 269 |
+
|
| 270 |
+
/**
|
| 271 |
+
* \brief The collection method used for an event.
|
| 272 |
+
*
|
| 273 |
+
* The collection method indicates how an event is collected.
|
| 274 |
+
*/
|
| 275 |
+
typedef enum {
|
| 276 |
+
/**
|
| 277 |
+
* Event is collected using a hardware global performance monitor.
|
| 278 |
+
*/
|
| 279 |
+
CUPTI_EVENT_COLLECTION_METHOD_PM = 0,
|
| 280 |
+
/**
|
| 281 |
+
* Event is collected using a hardware SM performance monitor.
|
| 282 |
+
*/
|
| 283 |
+
CUPTI_EVENT_COLLECTION_METHOD_SM = 1,
|
| 284 |
+
/**
|
| 285 |
+
* Event is collected using software instrumentation.
|
| 286 |
+
*/
|
| 287 |
+
CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2,
|
| 288 |
+
/**
|
| 289 |
+
* Event is collected using NvLink throughput counter method.
|
| 290 |
+
*/
|
| 291 |
+
CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3,
|
| 292 |
+
CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT = 0x7fffffff
|
| 293 |
+
} CUpti_EventCollectionMethod;
|
| 294 |
+
|
| 295 |
+
/**
|
| 296 |
+
* \brief Event group attributes.
|
| 297 |
+
*
|
| 298 |
+
* Event group attributes. These attributes can be read using \ref
|
| 299 |
+
* cuptiEventGroupGetAttribute. Attributes marked [rw] can also be
|
| 300 |
+
* written using \ref cuptiEventGroupSetAttribute.
|
| 301 |
+
*/
|
| 302 |
+
typedef enum {
|
| 303 |
+
/**
|
| 304 |
+
* The domain to which the event group is bound. This attribute is
|
| 305 |
+
* set when the first event is added to the group. Value is a
|
| 306 |
+
* CUpti_EventDomainID.
|
| 307 |
+
*/
|
| 308 |
+
CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0,
|
| 309 |
+
/**
|
| 310 |
+
* [rw] Profile all the instances of the domain for this
|
| 311 |
+
* eventgroup. This feature can be used to get load balancing
|
| 312 |
+
* across all instances of a domain. Value is an integer.
|
| 313 |
+
*/
|
| 314 |
+
CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1,
|
| 315 |
+
/**
|
| 316 |
+
* [rw] Reserved for user data.
|
| 317 |
+
*/
|
| 318 |
+
CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2,
|
| 319 |
+
/**
|
| 320 |
+
* Number of events in the group. Value is a uint32_t.
|
| 321 |
+
*/
|
| 322 |
+
CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3,
|
| 323 |
+
/**
|
| 324 |
+
* Enumerates events in the group. Value is a pointer to buffer of
|
| 325 |
+
* size sizeof(CUpti_EventID) * num_of_events in the eventgroup.
|
| 326 |
+
* num_of_events can be queried using
|
| 327 |
+
* CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
|
| 328 |
+
*/
|
| 329 |
+
CUPTI_EVENT_GROUP_ATTR_EVENTS = 4,
|
| 330 |
+
/**
|
| 331 |
+
* Number of instances of the domain bound to this event group that
|
| 332 |
+
* will be counted. Value is a uint32_t.
|
| 333 |
+
*/
|
| 334 |
+
CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5,
|
| 335 |
+
/**
|
| 336 |
+
* Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
|
| 337 |
+
* CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before
|
| 338 |
+
* adding any event.
|
| 339 |
+
* Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
|
| 340 |
+
* CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events
|
| 341 |
+
* that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH.
|
| 342 |
+
* If profiling scope of event is either
|
| 343 |
+
* CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT
|
| 344 |
+
* then setting this attribute will not affect the default scope.
|
| 345 |
+
* It is not allowed to add events of different scope to same eventgroup.
|
| 346 |
+
* Value is a uint32_t.
|
| 347 |
+
*/
|
| 348 |
+
CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6,
|
| 349 |
+
CUPTI_EVENT_GROUP_ATTR_FORCE_INT = 0x7fffffff,
|
| 350 |
+
} CUpti_EventGroupAttribute;
|
| 351 |
+
|
| 352 |
+
/**
|
| 353 |
+
* \brief Profiling scope for event.
|
| 354 |
+
*
|
| 355 |
+
* Profiling scope of event indicates if the event can be collected at context
|
| 356 |
+
* scope or device scope or both i.e. it can be collected at any of context or
|
| 357 |
+
* device scope.
|
| 358 |
+
*/
|
| 359 |
+
typedef enum {
|
| 360 |
+
/**
|
| 361 |
+
* Event is collected at context scope.
|
| 362 |
+
*/
|
| 363 |
+
CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0,
|
| 364 |
+
/**
|
| 365 |
+
* Event is collected at device scope.
|
| 366 |
+
*/
|
| 367 |
+
CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1,
|
| 368 |
+
/**
|
| 369 |
+
* Event can be collected at device or context scope.
|
| 370 |
+
* The scope can be set using \ref cuptiEventGroupSetAttribute API.
|
| 371 |
+
*/
|
| 372 |
+
CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2,
|
| 373 |
+
CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT = 0x7fffffff
|
| 374 |
+
} CUpti_EventProfilingScope;
|
| 375 |
+
|
| 376 |
+
/**
|
| 377 |
+
* \brief Event attributes.
|
| 378 |
+
*
|
| 379 |
+
* Event attributes. These attributes can be read using \ref
|
| 380 |
+
* cuptiEventGetAttribute.
|
| 381 |
+
*/
|
| 382 |
+
typedef enum {
|
| 383 |
+
/**
|
| 384 |
+
* Event name. Value is a null terminated const c-string.
|
| 385 |
+
*/
|
| 386 |
+
CUPTI_EVENT_ATTR_NAME = 0,
|
| 387 |
+
/**
|
| 388 |
+
* Short description of event. Value is a null terminated const
|
| 389 |
+
* c-string.
|
| 390 |
+
*/
|
| 391 |
+
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1,
|
| 392 |
+
/**
|
| 393 |
+
* Long description of event. Value is a null terminated const
|
| 394 |
+
* c-string.
|
| 395 |
+
*/
|
| 396 |
+
CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2,
|
| 397 |
+
/**
|
| 398 |
+
* Category of event. Value is CUpti_EventCategory.
|
| 399 |
+
*/
|
| 400 |
+
CUPTI_EVENT_ATTR_CATEGORY = 3,
|
| 401 |
+
/**
|
| 402 |
+
* Profiling scope of the events. It can be either device or context or both.
|
| 403 |
+
* Value is a \ref CUpti_EventProfilingScope.
|
| 404 |
+
*/
|
| 405 |
+
CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5,
|
| 406 |
+
|
| 407 |
+
CUPTI_EVENT_ATTR_FORCE_INT = 0x7fffffff,
|
| 408 |
+
} CUpti_EventAttribute;
|
| 409 |
+
|
| 410 |
+
/**
|
| 411 |
+
* \brief Event collection modes.
|
| 412 |
+
*
|
| 413 |
+
* The event collection mode determines the period over which the
|
| 414 |
+
* events within the enabled event groups will be collected.
|
| 415 |
+
*/
|
| 416 |
+
typedef enum {
|
| 417 |
+
/**
|
| 418 |
+
* Events are collected for the entire duration between the
|
| 419 |
+
* cuptiEventGroupEnable and cuptiEventGroupDisable calls.
|
| 420 |
+
* Event values are reset when the events are read.
|
| 421 |
+
* For CUDA toolkit v6.0 and older this was the default mode.
|
| 422 |
+
*/
|
| 423 |
+
CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0,
|
| 424 |
+
/**
|
| 425 |
+
* Events are collected only for the durations of kernel executions
|
| 426 |
+
* that occur between the cuptiEventGroupEnable and
|
| 427 |
+
* cuptiEventGroupDisable calls. Event collection begins when a
|
| 428 |
+
* kernel execution begins, and stops when kernel execution
|
| 429 |
+
* completes. Event values are reset to zero when each kernel
|
| 430 |
+
* execution begins. If multiple kernel executions occur between the
|
| 431 |
+
* cuptiEventGroupEnable and cuptiEventGroupDisable calls then the
|
| 432 |
+
* event values must be read after each kernel launch if those
|
| 433 |
+
* events need to be associated with the specific kernel launch.
|
| 434 |
+
* Note that collection in this mode may significantly change the
|
| 435 |
+
* overall performance characteristics of the application because
|
| 436 |
+
* kernel executions that occur between the cuptiEventGroupEnable and
|
| 437 |
+
* cuptiEventGroupDisable calls are serialized on the GPU.
|
| 438 |
+
* This is the default mode from CUDA toolkit v6.5
|
| 439 |
+
*/
|
| 440 |
+
CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1,
|
| 441 |
+
CUPTI_EVENT_COLLECTION_MODE_FORCE_INT = 0x7fffffff
|
| 442 |
+
} CUpti_EventCollectionMode;
|
| 443 |
+
|
| 444 |
+
/**
|
| 445 |
+
* \brief An event category.
|
| 446 |
+
*
|
| 447 |
+
* Each event is assigned to a category that represents the general
|
| 448 |
+
* type of the event. A event's category is accessed using \ref
|
| 449 |
+
* cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.
|
| 450 |
+
*/
|
| 451 |
+
typedef enum {
|
| 452 |
+
/**
|
| 453 |
+
* An instruction related event.
|
| 454 |
+
*/
|
| 455 |
+
CUPTI_EVENT_CATEGORY_INSTRUCTION = 0,
|
| 456 |
+
/**
|
| 457 |
+
* A memory related event.
|
| 458 |
+
*/
|
| 459 |
+
CUPTI_EVENT_CATEGORY_MEMORY = 1,
|
| 460 |
+
/**
|
| 461 |
+
* A cache related event.
|
| 462 |
+
*/
|
| 463 |
+
CUPTI_EVENT_CATEGORY_CACHE = 2,
|
| 464 |
+
/**
|
| 465 |
+
* A profile-trigger event.
|
| 466 |
+
*/
|
| 467 |
+
CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3,
|
| 468 |
+
/**
|
| 469 |
+
* A system event.
|
| 470 |
+
*/
|
| 471 |
+
CUPTI_EVENT_CATEGORY_SYSTEM = 4,
|
| 472 |
+
CUPTI_EVENT_CATEGORY_FORCE_INT = 0x7fffffff
|
| 473 |
+
} CUpti_EventCategory;
|
| 474 |
+
|
| 475 |
+
/**
|
| 476 |
+
* \brief The overflow value for a CUPTI event.
|
| 477 |
+
*
|
| 478 |
+
* The CUPTI event value that indicates an overflow.
|
| 479 |
+
*/
|
| 480 |
+
#define CUPTI_EVENT_OVERFLOW ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
|
| 481 |
+
|
| 482 |
+
/**
|
| 483 |
+
* \brief The value that indicates the event value is invalid
|
| 484 |
+
*/
|
| 485 |
+
#define CUPTI_EVENT_INVALID ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
|
| 486 |
+
|
| 487 |
+
/**
|
| 488 |
+
* \brief Flags for cuptiEventGroupReadEvent an
|
| 489 |
+
* cuptiEventGroupReadAllEvents.
|
| 490 |
+
*
|
| 491 |
+
* Flags for \ref cuptiEventGroupReadEvent an \ref
|
| 492 |
+
* cuptiEventGroupReadAllEvents.
|
| 493 |
+
*/
|
| 494 |
+
typedef enum {
|
| 495 |
+
/**
|
| 496 |
+
* No flags.
|
| 497 |
+
*/
|
| 498 |
+
CUPTI_EVENT_READ_FLAG_NONE = 0,
|
| 499 |
+
CUPTI_EVENT_READ_FLAG_FORCE_INT = 0x7fffffff,
|
| 500 |
+
} CUpti_ReadEventFlags;
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
/**
|
| 504 |
+
* \brief A set of event groups.
|
| 505 |
+
*
|
| 506 |
+
* A set of event groups. When returned by \ref
|
| 507 |
+
* cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
|
| 508 |
+
* a set indicates that event groups that can be enabled at the same
|
| 509 |
+
* time (i.e. all the events in the set can be collected
|
| 510 |
+
* simultaneously).
|
| 511 |
+
*/
|
| 512 |
+
typedef struct {
|
| 513 |
+
/**
|
| 514 |
+
* The number of event groups in the set.
|
| 515 |
+
*/
|
| 516 |
+
uint32_t numEventGroups;
|
| 517 |
+
/**
|
| 518 |
+
* An array of \p numEventGroups event groups.
|
| 519 |
+
*/
|
| 520 |
+
CUpti_EventGroup *eventGroups;
|
| 521 |
+
} CUpti_EventGroupSet;
|
| 522 |
+
|
| 523 |
+
/**
|
| 524 |
+
* \brief A set of event group sets.
|
| 525 |
+
*
|
| 526 |
+
* A set of event group sets. When returned by \ref
|
| 527 |
+
* cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
|
| 528 |
+
* a CUpti_EventGroupSets indicates the number of passes required to
|
| 529 |
+
* collect all the events, and the event groups that should be
|
| 530 |
+
* collected during each pass.
|
| 531 |
+
*/
|
| 532 |
+
typedef struct {
|
| 533 |
+
/**
|
| 534 |
+
* Number of event group sets.
|
| 535 |
+
*/
|
| 536 |
+
uint32_t numSets;
|
| 537 |
+
/**
|
| 538 |
+
* An array of \p numSets event group sets.
|
| 539 |
+
*/
|
| 540 |
+
CUpti_EventGroupSet *sets;
|
| 541 |
+
} CUpti_EventGroupSets;
|
| 542 |
+
|
| 543 |
+
/**
|
| 544 |
+
* \brief Set the event collection mode.
|
| 545 |
+
*
|
| 546 |
+
* Set the event collection mode for a \p context. The \p mode
|
| 547 |
+
* controls the event collection behavior of all events in event
|
| 548 |
+
* groups created in the \p context. This API is invalid in kernel
|
| 549 |
+
* replay mode.
|
| 550 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 551 |
+
*
|
| 552 |
+
* \param context The context
|
| 553 |
+
* \param mode The event collection mode
|
| 554 |
+
*
|
| 555 |
+
* \retval CUPTI_SUCCESS
|
| 556 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 557 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 558 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
|
| 559 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
|
| 560 |
+
*/
|
| 561 |
+
|
| 562 |
+
CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context,
|
| 563 |
+
CUpti_EventCollectionMode mode);
|
| 564 |
+
|
| 565 |
+
/**
|
| 566 |
+
* \brief Read a device attribute.
|
| 567 |
+
*
|
| 568 |
+
* Read a device attribute and return it in \p *value.
|
| 569 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 570 |
+
*
|
| 571 |
+
* \param device The CUDA device
|
| 572 |
+
* \param attrib The attribute to read
|
| 573 |
+
* \param valueSize Size of buffer pointed by the value, and
|
| 574 |
+
* returns the number of bytes written to \p value
|
| 575 |
+
* \param value Returns the value of the attribute
|
| 576 |
+
*
|
| 577 |
+
* \retval CUPTI_SUCCESS
|
| 578 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 579 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 580 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 581 |
+
* is NULL, or if \p attrib is not a device attribute
|
| 582 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 583 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 584 |
+
* to hold the attribute value.
|
| 585 |
+
*/
|
| 586 |
+
CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device,
|
| 587 |
+
CUpti_DeviceAttribute attrib,
|
| 588 |
+
size_t *valueSize,
|
| 589 |
+
void *value);
|
| 590 |
+
|
| 591 |
+
/**
|
| 592 |
+
* \brief Get the number of domains for a device.
|
| 593 |
+
*
|
| 594 |
+
* Returns the number of domains in \p numDomains for a device.
|
| 595 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 596 |
+
*
|
| 597 |
+
* \param device The CUDA device
|
| 598 |
+
* \param numDomains Returns the number of domains
|
| 599 |
+
*
|
| 600 |
+
* \retval CUPTI_SUCCESS
|
| 601 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 602 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 603 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
|
| 604 |
+
*/
|
| 605 |
+
CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device,
|
| 606 |
+
uint32_t *numDomains);
|
| 607 |
+
|
| 608 |
+
/**
|
| 609 |
+
* \brief Get the event domains for a device.
|
| 610 |
+
*
|
| 611 |
+
* Returns the event domains IDs in \p domainArray for a device. The
|
| 612 |
+
* size of the \p domainArray buffer is given by \p
|
| 613 |
+
* *arraySizeBytes. The size of the \p domainArray buffer must be at
|
| 614 |
+
* least \p numdomains * sizeof(CUpti_EventDomainID) or else all
|
| 615 |
+
* domains will not be returned. The value returned in \p
|
| 616 |
+
* *arraySizeBytes contains the number of bytes returned in \p
|
| 617 |
+
* domainArray.
|
| 618 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 619 |
+
*
|
| 620 |
+
* \param device The CUDA device
|
| 621 |
+
* \param arraySizeBytes The size of \p domainArray in bytes, and
|
| 622 |
+
* returns the number of bytes written to \p domainArray
|
| 623 |
+
* \param domainArray Returns the IDs of the event domains for the device
|
| 624 |
+
*
|
| 625 |
+
* \retval CUPTI_SUCCESS
|
| 626 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 627 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 628 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
|
| 629 |
+
* \p domainArray are NULL
|
| 630 |
+
*/
|
| 631 |
+
CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device,
|
| 632 |
+
size_t *arraySizeBytes,
|
| 633 |
+
CUpti_EventDomainID *domainArray);
|
| 634 |
+
|
| 635 |
+
/**
|
| 636 |
+
* \brief Read an event domain attribute.
|
| 637 |
+
*
|
| 638 |
+
* Returns an event domain attribute in \p *value. The size of the \p
|
| 639 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 640 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 641 |
+
*
|
| 642 |
+
* If the attribute value is a c-string that is longer than \p
|
| 643 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 644 |
+
* returned and there will be no terminating null byte.
|
| 645 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 646 |
+
*
|
| 647 |
+
* \param device The CUDA device
|
| 648 |
+
* \param eventDomain ID of the event domain
|
| 649 |
+
* \param attrib The event domain attribute to read
|
| 650 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 651 |
+
* returns the number of bytes written to \p value
|
| 652 |
+
* \param value Returns the attribute's value
|
| 653 |
+
*
|
| 654 |
+
* \retval CUPTI_SUCCESS
|
| 655 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 656 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 657 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 658 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 659 |
+
* is NULL, or if \p attrib is not an event domain attribute
|
| 660 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 661 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 662 |
+
* to hold the attribute value.
|
| 663 |
+
*/
|
| 664 |
+
CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device,
|
| 665 |
+
CUpti_EventDomainID eventDomain,
|
| 666 |
+
CUpti_EventDomainAttribute attrib,
|
| 667 |
+
size_t *valueSize,
|
| 668 |
+
void *value);
|
| 669 |
+
|
| 670 |
+
/**
|
| 671 |
+
* \brief Get the number of event domains available on any device.
|
| 672 |
+
*
|
| 673 |
+
* Returns the total number of event domains available on any
|
| 674 |
+
* CUDA-capable device.
|
| 675 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 676 |
+
*
|
| 677 |
+
* \param numDomains Returns the number of domains
|
| 678 |
+
*
|
| 679 |
+
* \retval CUPTI_SUCCESS
|
| 680 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
|
| 681 |
+
*/
|
| 682 |
+
CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains);
|
| 683 |
+
|
| 684 |
+
/**
|
| 685 |
+
* \brief Get the event domains available on any device.
|
| 686 |
+
*
|
| 687 |
+
* Returns all the event domains available on any CUDA-capable device.
|
| 688 |
+
* Event domain IDs are returned in \p domainArray. The size of the \p
|
| 689 |
+
* domainArray buffer is given by \p *arraySizeBytes. The size of the
|
| 690 |
+
* \p domainArray buffer must be at least \p numDomains *
|
| 691 |
+
* sizeof(CUpti_EventDomainID) or all domains will not be
|
| 692 |
+
* returned. The value returned in \p *arraySizeBytes contains the
|
| 693 |
+
* number of bytes returned in \p domainArray.
|
| 694 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 695 |
+
*
|
| 696 |
+
* \param arraySizeBytes The size of \p domainArray in bytes, and
|
| 697 |
+
* returns the number of bytes written to \p domainArray
|
| 698 |
+
* \param domainArray Returns all the event domains
|
| 699 |
+
*
|
| 700 |
+
* \retval CUPTI_SUCCESS
|
| 701 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
|
| 702 |
+
* \p domainArray are NULL
|
| 703 |
+
*/
|
| 704 |
+
CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes,
|
| 705 |
+
CUpti_EventDomainID *domainArray);
|
| 706 |
+
|
| 707 |
+
/**
|
| 708 |
+
* \brief Read an event domain attribute.
|
| 709 |
+
*
|
| 710 |
+
* Returns an event domain attribute in \p *value. The size of the \p
|
| 711 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 712 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 713 |
+
*
|
| 714 |
+
* If the attribute value is a c-string that is longer than \p
|
| 715 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 716 |
+
* returned and there will be no terminating null byte.
|
| 717 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 718 |
+
*
|
| 719 |
+
* \param eventDomain ID of the event domain
|
| 720 |
+
* \param attrib The event domain attribute to read
|
| 721 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 722 |
+
* returns the number of bytes written to \p value
|
| 723 |
+
* \param value Returns the attribute's value
|
| 724 |
+
*
|
| 725 |
+
* \retval CUPTI_SUCCESS
|
| 726 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 727 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 728 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 729 |
+
* is NULL, or if \p attrib is not an event domain attribute
|
| 730 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 731 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 732 |
+
* to hold the attribute value.
|
| 733 |
+
*/
|
| 734 |
+
CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain,
|
| 735 |
+
CUpti_EventDomainAttribute attrib,
|
| 736 |
+
size_t *valueSize,
|
| 737 |
+
void *value);
|
| 738 |
+
|
| 739 |
+
/**
|
| 740 |
+
* \brief Get number of events in a domain.
|
| 741 |
+
*
|
| 742 |
+
* Returns the number of events in \p numEvents for a domain.
|
| 743 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 744 |
+
*
|
| 745 |
+
* \param eventDomain ID of the event domain
|
| 746 |
+
* \param numEvents Returns the number of events in the domain
|
| 747 |
+
*
|
| 748 |
+
* \retval CUPTI_SUCCESS
|
| 749 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 750 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 751 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
|
| 752 |
+
*/
|
| 753 |
+
CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain,
|
| 754 |
+
uint32_t *numEvents);
|
| 755 |
+
|
| 756 |
+
/**
|
| 757 |
+
* \brief Get the events in a domain.
|
| 758 |
+
*
|
| 759 |
+
* Returns the event IDs in \p eventArray for a domain. The size of
|
| 760 |
+
* the \p eventArray buffer is given by \p *arraySizeBytes. The size
|
| 761 |
+
* of the \p eventArray buffer must be at least \p numdomainevents *
|
| 762 |
+
* sizeof(CUpti_EventID) or else all events will not be returned. The
|
| 763 |
+
* value returned in \p *arraySizeBytes contains the number of bytes
|
| 764 |
+
* returned in \p eventArray.
|
| 765 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 766 |
+
*
|
| 767 |
+
* \param eventDomain ID of the event domain
|
| 768 |
+
* \param arraySizeBytes The size of \p eventArray in bytes, and
|
| 769 |
+
* returns the number of bytes written to \p eventArray
|
| 770 |
+
* \param eventArray Returns the IDs of the events in the domain
|
| 771 |
+
*
|
| 772 |
+
* \retval CUPTI_SUCCESS
|
| 773 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 774 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 775 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or \p
|
| 776 |
+
* eventArray are NULL
|
| 777 |
+
*/
|
| 778 |
+
CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain,
|
| 779 |
+
size_t *arraySizeBytes,
|
| 780 |
+
CUpti_EventID *eventArray);
|
| 781 |
+
|
| 782 |
+
/**
|
| 783 |
+
* \brief Get an event attribute.
|
| 784 |
+
*
|
| 785 |
+
* Returns an event attribute in \p *value. The size of the \p
|
| 786 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 787 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 788 |
+
*
|
| 789 |
+
* If the attribute value is a c-string that is longer than \p
|
| 790 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 791 |
+
* returned and there will be no terminating null byte.
|
| 792 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 793 |
+
*
|
| 794 |
+
* \param event ID of the event
|
| 795 |
+
* \param attrib The event attribute to read
|
| 796 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 797 |
+
* returns the number of bytes written to \p value
|
| 798 |
+
* \param value Returns the attribute's value
|
| 799 |
+
*
|
| 800 |
+
* \retval CUPTI_SUCCESS
|
| 801 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 802 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 803 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 804 |
+
* is NULL, or if \p attrib is not an event attribute
|
| 805 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 806 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 807 |
+
* to hold the attribute value.
|
| 808 |
+
*/
|
| 809 |
+
CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event,
|
| 810 |
+
CUpti_EventAttribute attrib,
|
| 811 |
+
size_t *valueSize,
|
| 812 |
+
void *value);
|
| 813 |
+
|
| 814 |
+
/**
|
| 815 |
+
* \brief Find an event by name.
|
| 816 |
+
*
|
| 817 |
+
* Find an event by name and return the event ID in \p *event.
|
| 818 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 819 |
+
*
|
| 820 |
+
* \param device The CUDA device
|
| 821 |
+
* \param eventName The name of the event to find
|
| 822 |
+
* \param event Returns the ID of the found event or undefined if
|
| 823 |
+
* unable to find the event
|
| 824 |
+
*
|
| 825 |
+
* \retval CUPTI_SUCCESS
|
| 826 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 827 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 828 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event
|
| 829 |
+
* with name \p eventName. In this case \p *event is undefined
|
| 830 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventName or \p event are NULL
|
| 831 |
+
*/
|
| 832 |
+
CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device,
|
| 833 |
+
const char *eventName,
|
| 834 |
+
CUpti_EventID *event);
|
| 835 |
+
|
| 836 |
+
/**
|
| 837 |
+
* \brief Create a new event group for a context.
|
| 838 |
+
*
|
| 839 |
+
* Creates a new event group for \p context and returns the new group
|
| 840 |
+
* in \p *eventGroup.
|
| 841 |
+
* \note \p flags are reserved for future use and should be set to zero.
|
| 842 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 843 |
+
*
|
| 844 |
+
* \param context The context for the event group
|
| 845 |
+
* \param eventGroup Returns the new event group
|
| 846 |
+
* \param flags Reserved - must be zero
|
| 847 |
+
*
|
| 848 |
+
* \retval CUPTI_SUCCESS
|
| 849 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 850 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 851 |
+
* \retval CUPTI_ERROR_OUT_OF_MEMORY
|
| 852 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 853 |
+
*/
|
| 854 |
+
CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context,
|
| 855 |
+
CUpti_EventGroup *eventGroup,
|
| 856 |
+
uint32_t flags);
|
| 857 |
+
|
| 858 |
+
/**
|
| 859 |
+
* \brief Destroy an event group.
|
| 860 |
+
*
|
| 861 |
+
* Destroy an \p eventGroup and free its resources. An event group
|
| 862 |
+
* cannot be destroyed if it is enabled.
|
| 863 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 864 |
+
*
|
| 865 |
+
* \param eventGroup The event group to destroy
|
| 866 |
+
*
|
| 867 |
+
* \retval CUPTI_SUCCESS
|
| 868 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 869 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
|
| 870 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
|
| 871 |
+
*/
|
| 872 |
+
CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup);
|
| 873 |
+
|
| 874 |
+
/**
|
| 875 |
+
* \brief Read an event group attribute.
|
| 876 |
+
*
|
| 877 |
+
* Read an event group attribute and return it in \p *value.
|
| 878 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 879 |
+
* must guard against simultaneous destruction or modification of \p
|
| 880 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 881 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 882 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 883 |
+
* context in which \p eventGroup was created (for example, client
|
| 884 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 885 |
+
* cuCtxDestroy, etc.).
|
| 886 |
+
*
|
| 887 |
+
* \param eventGroup The event group
|
| 888 |
+
* \param attrib The attribute to read
|
| 889 |
+
* \param valueSize Size of buffer pointed by the value, and
|
| 890 |
+
* returns the number of bytes written to \p value
|
| 891 |
+
* \param value Returns the value of the attribute
|
| 892 |
+
*
|
| 893 |
+
* \retval CUPTI_SUCCESS
|
| 894 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 895 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 896 |
+
* is NULL, or if \p attrib is not an eventgroup attribute
|
| 897 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 898 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 899 |
+
* to hold the attribute value.
|
| 900 |
+
*/
|
| 901 |
+
CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup,
|
| 902 |
+
CUpti_EventGroupAttribute attrib,
|
| 903 |
+
size_t *valueSize,
|
| 904 |
+
void *value);
|
| 905 |
+
|
| 906 |
+
/**
|
| 907 |
+
* \brief Write an event group attribute.
|
| 908 |
+
*
|
| 909 |
+
* Write an event group attribute.
|
| 910 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 911 |
+
*
|
| 912 |
+
* \param eventGroup The event group
|
| 913 |
+
* \param attrib The attribute to write
|
| 914 |
+
* \param valueSize The size, in bytes, of the value
|
| 915 |
+
* \param value The attribute value to write
|
| 916 |
+
*
|
| 917 |
+
* \retval CUPTI_SUCCESS
|
| 918 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 919 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 920 |
+
* is NULL, or if \p attrib is not an event group attribute, or if
|
| 921 |
+
* \p attrib is not a writable attribute
|
| 922 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that
|
| 923 |
+
* the \p value buffer is too small to hold the attribute value.
|
| 924 |
+
*/
|
| 925 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup,
|
| 926 |
+
CUpti_EventGroupAttribute attrib,
|
| 927 |
+
size_t valueSize,
|
| 928 |
+
void *value);
|
| 929 |
+
|
| 930 |
+
/**
|
| 931 |
+
* \brief Add an event to an event group.
|
| 932 |
+
*
|
| 933 |
+
* Add an event to an event group. The event add can fail for a number of reasons:
|
| 934 |
+
* \li The event group is enabled
|
| 935 |
+
* \li The event does not belong to the same event domain as the
|
| 936 |
+
* events that are already in the event group
|
| 937 |
+
* \li Device limitations on the events that can belong to the same group
|
| 938 |
+
* \li The event group is full
|
| 939 |
+
*
|
| 940 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 941 |
+
*
|
| 942 |
+
* \param eventGroup The event group
|
| 943 |
+
* \param event The event to add to the group
|
| 944 |
+
*
|
| 945 |
+
* \retval CUPTI_SUCCESS
|
| 946 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 947 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 948 |
+
* \retval CUPTI_ERROR_OUT_OF_MEMORY
|
| 949 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 950 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p event belongs to a
|
| 951 |
+
* different event domain than the events already in \p eventGroup, or
|
| 952 |
+
* if a device limitation prevents \p event from being collected at
|
| 953 |
+
* the same time as the events already in \p eventGroup
|
| 954 |
+
* \retval CUPTI_ERROR_MAX_LIMIT_REACHED if \p eventGroup is full
|
| 955 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 956 |
+
*/
|
| 957 |
+
CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup,
|
| 958 |
+
CUpti_EventID event);
|
| 959 |
+
|
| 960 |
+
/**
|
| 961 |
+
* \brief Remove an event from an event group.
|
| 962 |
+
*
|
| 963 |
+
* Remove \p event from the an event group. The event cannot be
|
| 964 |
+
* removed if the event group is enabled.
|
| 965 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 966 |
+
*
|
| 967 |
+
* \param eventGroup The event group
|
| 968 |
+
* \param event The event to remove from the group
|
| 969 |
+
*
|
| 970 |
+
* \retval CUPTI_SUCCESS
|
| 971 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 972 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 973 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 974 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 975 |
+
*/
|
| 976 |
+
CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup,
|
| 977 |
+
CUpti_EventID event);
|
| 978 |
+
|
| 979 |
+
/**
|
| 980 |
+
* \brief Remove all events from an event group.
|
| 981 |
+
*
|
| 982 |
+
* Remove all events from an event group. Events cannot be removed if
|
| 983 |
+
* the event group is enabled.
|
| 984 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 985 |
+
*
|
| 986 |
+
* \param eventGroup The event group
|
| 987 |
+
*
|
| 988 |
+
* \retval CUPTI_SUCCESS
|
| 989 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 990 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 991 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 992 |
+
*/
|
| 993 |
+
CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup);
|
| 994 |
+
|
| 995 |
+
/**
|
| 996 |
+
* \brief Zero all the event counts in an event group.
|
| 997 |
+
*
|
| 998 |
+
* Zero all the event counts in an event group.
|
| 999 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1000 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1001 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1002 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1003 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1004 |
+
* context in which \p eventGroup was created (for example, client
|
| 1005 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1006 |
+
* cuCtxDestroy, etc.).
|
| 1007 |
+
*
|
| 1008 |
+
* \param eventGroup The event group
|
| 1009 |
+
*
|
| 1010 |
+
* \retval CUPTI_SUCCESS
|
| 1011 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1012 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1013 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1014 |
+
*/
|
| 1015 |
+
CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup);
|
| 1016 |
+
|
| 1017 |
+
/**
|
| 1018 |
+
* \brief Enable an event group.
|
| 1019 |
+
*
|
| 1020 |
+
* Enable an event group. Enabling an event group zeros the value of
|
| 1021 |
+
* all the events in the group and then starts collection of those
|
| 1022 |
+
* events.
|
| 1023 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1024 |
+
*
|
| 1025 |
+
* \param eventGroup The event group
|
| 1026 |
+
*
|
| 1027 |
+
* \retval CUPTI_SUCCESS
|
| 1028 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1029 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1030 |
+
* \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
|
| 1031 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
|
| 1032 |
+
* enabled due to other already enabled event groups
|
| 1033 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1034 |
+
* \retval CUPTI_ERROR_HARDWARE_BUSY if another client is profiling
|
| 1035 |
+
* and hardware is busy
|
| 1036 |
+
*/
|
| 1037 |
+
CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup);
|
| 1038 |
+
|
| 1039 |
+
/**
|
| 1040 |
+
* \brief Disable an event group.
|
| 1041 |
+
*
|
| 1042 |
+
* Disable an event group. Disabling an event group stops collection
|
| 1043 |
+
* of events contained in the group.
|
| 1044 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1045 |
+
*
|
| 1046 |
+
* \param eventGroup The event group
|
| 1047 |
+
*
|
| 1048 |
+
* \retval CUPTI_SUCCESS
|
| 1049 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1050 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1051 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1052 |
+
*/
|
| 1053 |
+
CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup);
|
| 1054 |
+
|
| 1055 |
+
/**
|
| 1056 |
+
* \brief Read the value for an event in an event group.
|
| 1057 |
+
*
|
| 1058 |
+
* Read the value for an event in an event group. The event value is
|
| 1059 |
+
* returned in the \p eventValueBuffer buffer. \p
|
| 1060 |
+
* eventValueBufferSizeBytes indicates the size of the \p
|
| 1061 |
+
* eventValueBuffer buffer. The buffer must be at least sizeof(uint64)
|
| 1062 |
+
* if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set
|
| 1063 |
+
* on the group containing the event. The buffer must be at least
|
| 1064 |
+
* (sizeof(uint64) * number of domain instances) if
|
| 1065 |
+
* ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the
|
| 1066 |
+
* group.
|
| 1067 |
+
*
|
| 1068 |
+
* If any instance of an event counter overflows, the value returned
|
| 1069 |
+
* for that event instance will be ::CUPTI_EVENT_OVERFLOW.
|
| 1070 |
+
*
|
| 1071 |
+
* The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
|
| 1072 |
+
*
|
| 1073 |
+
* Reading an event from a disabled event group is not allowed. After
|
| 1074 |
+
* being read, an event's value is reset to zero.
|
| 1075 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1076 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1077 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1078 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1079 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1080 |
+
* context in which \p eventGroup was created (for example, client
|
| 1081 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1082 |
+
* cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
|
| 1083 |
+
* called simultaneously with this function, then returned event
|
| 1084 |
+
* values are undefined.
|
| 1085 |
+
*
|
| 1086 |
+
* \param eventGroup The event group
|
| 1087 |
+
* \param flags Flags controlling the reading mode
|
| 1088 |
+
* \param event The event to read
|
| 1089 |
+
* \param eventValueBufferSizeBytes The size of \p eventValueBuffer
|
| 1090 |
+
* in bytes, and returns the number of bytes written to \p
|
| 1091 |
+
* eventValueBuffer
|
| 1092 |
+
* \param eventValueBuffer Returns the event value(s)
|
| 1093 |
+
*
|
| 1094 |
+
* \retval CUPTI_SUCCESS
|
| 1095 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1096 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 1097 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1098 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
|
| 1099 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
|
| 1100 |
+
* eventValueBufferSizeBytes or \p eventValueBuffer is NULL
|
| 1101 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
|
| 1102 |
+
* is not sufficient
|
| 1103 |
+
*/
|
| 1104 |
+
CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup,
|
| 1105 |
+
CUpti_ReadEventFlags flags,
|
| 1106 |
+
CUpti_EventID event,
|
| 1107 |
+
size_t *eventValueBufferSizeBytes,
|
| 1108 |
+
uint64_t *eventValueBuffer);
|
| 1109 |
+
|
| 1110 |
+
/**
|
| 1111 |
+
* \brief Read the values for all the events in an event group.
|
| 1112 |
+
*
|
| 1113 |
+
* Read the values for all the events in an event group. The event
|
| 1114 |
+
* values are returned in the \p eventValueBuffer buffer. \p
|
| 1115 |
+
* eventValueBufferSizeBytes indicates the size of \p
|
| 1116 |
+
* eventValueBuffer. The buffer must be at least (sizeof(uint64) *
|
| 1117 |
+
* number of events in group) if
|
| 1118 |
+
* ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on
|
| 1119 |
+
* the group containing the events. The buffer must be at least
|
| 1120 |
+
* (sizeof(uint64) * number of domain instances * number of events in
|
| 1121 |
+
* group) if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is
|
| 1122 |
+
* set on the group.
|
| 1123 |
+
*
|
| 1124 |
+
* The data format returned in \p eventValueBuffer is:
|
| 1125 |
+
* - domain instance 0: event0 event1 ... eventN
|
| 1126 |
+
* - domain instance 1: event0 event1 ... eventN
|
| 1127 |
+
* - ...
|
| 1128 |
+
* - domain instance M: event0 event1 ... eventN
|
| 1129 |
+
*
|
| 1130 |
+
* The event order in \p eventValueBuffer is returned in \p
|
| 1131 |
+
* eventIdArray. The size of \p eventIdArray is specified in \p
|
| 1132 |
+
* eventIdArraySizeBytes. The size should be at least
|
| 1133 |
+
* (sizeof(CUpti_EventID) * number of events in group).
|
| 1134 |
+
*
|
| 1135 |
+
* If any instance of any event counter overflows, the value returned
|
| 1136 |
+
* for that event instance will be ::CUPTI_EVENT_OVERFLOW.
|
| 1137 |
+
*
|
| 1138 |
+
* The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
|
| 1139 |
+
*
|
| 1140 |
+
* Reading events from a disabled event group is not allowed. After
|
| 1141 |
+
* being read, an event's value is reset to zero.
|
| 1142 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1143 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1144 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1145 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1146 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1147 |
+
* context in which \p eventGroup was created (for example, client
|
| 1148 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1149 |
+
* cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
|
| 1150 |
+
* called simultaneously with this function, then returned event
|
| 1151 |
+
* values are undefined.
|
| 1152 |
+
*
|
| 1153 |
+
* \param eventGroup The event group
|
| 1154 |
+
* \param flags Flags controlling the reading mode
|
| 1155 |
+
* \param eventValueBufferSizeBytes The size of \p eventValueBuffer in
|
| 1156 |
+
* bytes, and returns the number of bytes written to \p
|
| 1157 |
+
* eventValueBuffer
|
| 1158 |
+
* \param eventValueBuffer Returns the event values
|
| 1159 |
+
* \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
|
| 1160 |
+
* and returns the number of bytes written to \p eventIdArray
|
| 1161 |
+
* \param eventIdArray Returns the IDs of the events in the same order
|
| 1162 |
+
* as the values return in eventValueBuffer.
|
| 1163 |
+
* \param numEventIdsRead Returns the number of event IDs returned
|
| 1164 |
+
* in \p eventIdArray
|
| 1165 |
+
*
|
| 1166 |
+
* \retval CUPTI_SUCCESS
|
| 1167 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1168 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1169 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
|
| 1170 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
|
| 1171 |
+
* eventValueBufferSizeBytes, \p eventValueBuffer, \p
|
| 1172 |
+
* eventIdArraySizeBytes, \p eventIdArray or \p numEventIdsRead is
|
| 1173 |
+
* NULL
|
| 1174 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
|
| 1175 |
+
* or \p eventIdArray is not sufficient
|
| 1176 |
+
*/
|
| 1177 |
+
CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup eventGroup,
|
| 1178 |
+
CUpti_ReadEventFlags flags,
|
| 1179 |
+
size_t *eventValueBufferSizeBytes,
|
| 1180 |
+
uint64_t *eventValueBuffer,
|
| 1181 |
+
size_t *eventIdArraySizeBytes,
|
| 1182 |
+
CUpti_EventID *eventIdArray,
|
| 1183 |
+
size_t *numEventIdsRead);
|
| 1184 |
+
|
| 1185 |
+
/**
|
| 1186 |
+
* \brief For a set of events, get the grouping that indicates the
|
| 1187 |
+
* number of passes and the event groups necessary to collect the
|
| 1188 |
+
* events.
|
| 1189 |
+
*
|
| 1190 |
+
* The number of events that can be collected simultaneously varies by
|
| 1191 |
+
* device and by the type of the events. When events can be collected
|
| 1192 |
+
* simultaneously, they may need to be grouped into multiple event
|
| 1193 |
+
* groups because they are from different event domains. This function
|
| 1194 |
+
* takes a set of events and determines how many passes are required
|
| 1195 |
+
* to collect all those events, and which events can be collected
|
| 1196 |
+
* simultaneously in each pass.
|
| 1197 |
+
*
|
| 1198 |
+
* The CUpti_EventGroupSets returned in \p eventGroupPasses indicates
|
| 1199 |
+
* how many passes are required to collect the events with the \p
|
| 1200 |
+
* numSets field. Within each event group set, the \p sets array
|
| 1201 |
+
* indicates the event groups that should be collected on each pass.
|
| 1202 |
+
* \note \b Thread-safety: this function is thread safe, but client
|
| 1203 |
+
* must guard against another thread simultaneously destroying \p
|
| 1204 |
+
* context.
|
| 1205 |
+
*
|
| 1206 |
+
* \param context The context for event collection
|
| 1207 |
+
* \param eventIdArraySizeBytes Size of \p eventIdArray in bytes
|
| 1208 |
+
* \param eventIdArray Array of event IDs that need to be grouped
|
| 1209 |
+
* \param eventGroupPasses Returns a CUpti_EventGroupSets object that
|
| 1210 |
+
* indicates the number of passes required to collect the events and
|
| 1211 |
+
* the events to collect on each pass
|
| 1212 |
+
*
|
| 1213 |
+
* \retval CUPTI_SUCCESS
|
| 1214 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1215 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 1216 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 1217 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArray or
|
| 1218 |
+
* \p eventGroupPasses is NULL
|
| 1219 |
+
*/
|
| 1220 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context,
|
| 1221 |
+
size_t eventIdArraySizeBytes,
|
| 1222 |
+
CUpti_EventID *eventIdArray,
|
| 1223 |
+
CUpti_EventGroupSets **eventGroupPasses);
|
| 1224 |
+
|
| 1225 |
+
/**
|
| 1226 |
+
* \brief Destroy a event group sets object.
|
| 1227 |
+
*
|
| 1228 |
+
* Destroy a CUpti_EventGroupSets object.
|
| 1229 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1230 |
+
*
|
| 1231 |
+
* \param eventGroupSets The object to destroy
|
| 1232 |
+
*
|
| 1233 |
+
* \retval CUPTI_SUCCESS
|
| 1234 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1235 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if any of the event groups
|
| 1236 |
+
* contained in the sets is enabled
|
| 1237 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSets is NULL
|
| 1238 |
+
*/
|
| 1239 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets);
|
| 1240 |
+
|
| 1241 |
+
|
| 1242 |
+
/**
|
| 1243 |
+
* \brief Enable an event group set.
|
| 1244 |
+
*
|
| 1245 |
+
* Enable a set of event groups. Enabling a set of event groups zeros the value of
|
| 1246 |
+
* all the events in all the groups and then starts collection of those events.
|
| 1247 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1248 |
+
*
|
| 1249 |
+
* \param eventGroupSet The pointer to the event group set
|
| 1250 |
+
*
|
| 1251 |
+
* \retval CUPTI_SUCCESS
|
| 1252 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1253 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1254 |
+
* \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
|
| 1255 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
|
| 1256 |
+
* enabled due to other already enabled event groups
|
| 1257 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
|
| 1258 |
+
* \retval CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is
|
| 1259 |
+
* busy
|
| 1260 |
+
*/
|
| 1261 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet);
|
| 1262 |
+
|
| 1263 |
+
/**
|
| 1264 |
+
* \brief Disable an event group set.
|
| 1265 |
+
*
|
| 1266 |
+
* Disable a set of event groups. Disabling a set of event groups
|
| 1267 |
+
* stops collection of events contained in the groups.
|
| 1268 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1269 |
+
* \note \b If this call fails, some of the event groups in the set may be disabled
|
| 1270 |
+
* and other event groups may remain enabled.
|
| 1271 |
+
*
|
| 1272 |
+
* \param eventGroupSet The pointer to the event group set
|
| 1273 |
+
* \retval CUPTI_SUCCESS
|
| 1274 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1275 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1276 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
|
| 1277 |
+
*/
|
| 1278 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet);
|
| 1279 |
+
|
| 1280 |
+
/**
|
| 1281 |
+
* \brief Enable kernel replay mode.
|
| 1282 |
+
*
|
| 1283 |
+
* Set profiling mode for the context to replay mode. In this mode,
|
| 1284 |
+
* any number of events can be collected in one run of the kernel. The
|
| 1285 |
+
* event collection mode will automatically switch to
|
| 1286 |
+
* CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, \ref
|
| 1287 |
+
* cuptiSetEventCollectionMode will return
|
| 1288 |
+
* CUPTI_ERROR_INVALID_OPERATION.
|
| 1289 |
+
* \note \b Kernels might take longer to run if many events are enabled.
|
| 1290 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1291 |
+
*
|
| 1292 |
+
* \param context The context
|
| 1293 |
+
* \retval CUPTI_SUCCESS
|
| 1294 |
+
*/
|
| 1295 |
+
CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context);
|
| 1296 |
+
|
| 1297 |
+
/**
|
| 1298 |
+
* \brief Disable kernel replay mode.
|
| 1299 |
+
*
|
| 1300 |
+
* Set profiling mode for the context to non-replay (default)
|
| 1301 |
+
* mode. Event collection mode will be set to
|
| 1302 |
+
* CUPTI_EVENT_COLLECTION_MODE_KERNEL. All previously enabled
|
| 1303 |
+
* event groups and event group sets will be disabled.
|
| 1304 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1305 |
+
*
|
| 1306 |
+
* \param context The context
|
| 1307 |
+
* \retval CUPTI_SUCCESS
|
| 1308 |
+
*/
|
| 1309 |
+
CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context);
|
| 1310 |
+
|
| 1311 |
+
/**
|
| 1312 |
+
* \brief Function type for getting updates on kernel replay.
|
| 1313 |
+
*
|
| 1314 |
+
* \param kernelName The mangled kernel name
|
| 1315 |
+
* \param numReplaysDone Number of replays done so far
|
| 1316 |
+
* \param customData Pointer of any custom data passed in when subscribing
|
| 1317 |
+
*/
|
| 1318 |
+
typedef void (CUPTIAPI *CUpti_KernelReplayUpdateFunc)(
|
| 1319 |
+
const char *kernelName,
|
| 1320 |
+
int numReplaysDone,
|
| 1321 |
+
void *customData);
|
| 1322 |
+
|
| 1323 |
+
/**
|
| 1324 |
+
* \brief Subscribe to kernel replay updates.
|
| 1325 |
+
*
|
| 1326 |
+
* When subscribed, the function pointer passed in will be called each time a
|
| 1327 |
+
* kernel run is finished during kernel replay. Previously subscribed function
|
| 1328 |
+
* pointer will be replaced. Pass in NULL as the function pointer unsubscribes
|
| 1329 |
+
* the update.
|
| 1330 |
+
*
|
| 1331 |
+
* \param updateFunc The update function pointer
|
| 1332 |
+
* \param customData Pointer to any custom data
|
| 1333 |
+
* \retval CUPTI_SUCCESS
|
| 1334 |
+
*/
|
| 1335 |
+
CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData);
|
| 1336 |
+
|
| 1337 |
+
/** @} */ /* END CUPTI_EVENT_API */
|
| 1338 |
+
|
| 1339 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 1340 |
+
#pragma GCC visibility pop
|
| 1341 |
+
#endif
|
| 1342 |
+
|
| 1343 |
+
#if defined(__cplusplus)
|
| 1344 |
+
}
|
| 1345 |
+
#endif
|
| 1346 |
+
|
| 1347 |
+
#endif /*_CUPTI_EVENTS_H_*/
|
| 1348 |
+
|
| 1349 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_pcsampling.h
ADDED
|
@@ -0,0 +1,936 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2020-2022 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_PCSAMPLING_H_)
|
| 51 |
+
#define _CUPTI_PCSAMPLING_H_
|
| 52 |
+
|
| 53 |
+
#include <cuda.h>
|
| 54 |
+
#include <stdint.h>
|
| 55 |
+
#include <stddef.h>
|
| 56 |
+
#include "cupti_result.h"
|
| 57 |
+
#include "cupti_common.h"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
#if defined(__cplusplus)
|
| 61 |
+
extern "C" {
|
| 62 |
+
#endif
|
| 63 |
+
|
| 64 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 65 |
+
#pragma GCC visibility push(default)
|
| 66 |
+
#endif
|
| 67 |
+
|
| 68 |
+
/**
|
| 69 |
+
* \defgroup CUPTI_PCSAMPLING_API CUPTI PC Sampling API
|
| 70 |
+
* Functions, types, and enums that implement the CUPTI PC Sampling API.
|
| 71 |
+
* @{
|
| 72 |
+
*/
|
| 73 |
+
|
| 74 |
+
#ifndef CUPTI_PCSAMPLING_STRUCT_SIZE
|
| 75 |
+
#define CUPTI_PCSAMPLING_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
|
| 76 |
+
#endif
|
| 77 |
+
|
| 78 |
+
#ifndef CUPTI_STALL_REASON_STRING_SIZE
|
| 79 |
+
#define CUPTI_STALL_REASON_STRING_SIZE 128
|
| 80 |
+
#endif
|
| 81 |
+
|
| 82 |
+
/**
|
| 83 |
+
* \brief PC Sampling collection mode
|
| 84 |
+
*/
|
| 85 |
+
typedef enum
|
| 86 |
+
{
|
| 87 |
+
/**
|
| 88 |
+
* INVALID Value
|
| 89 |
+
*/
|
| 90 |
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID = 0,
|
| 91 |
+
/**
|
| 92 |
+
* Continuous mode. Kernels are not serialized in this mode.
|
| 93 |
+
*/
|
| 94 |
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS = 1,
|
| 95 |
+
/**
|
| 96 |
+
* Serialized mode. Kernels are serialized in this mode.
|
| 97 |
+
*/
|
| 98 |
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED = 2,
|
| 99 |
+
} CUpti_PCSamplingCollectionMode;
|
| 100 |
+
|
| 101 |
+
/**
|
| 102 |
+
* \brief PC Sampling stall reasons
|
| 103 |
+
*/
|
| 104 |
+
typedef struct PACKED_ALIGNMENT
|
| 105 |
+
{
|
| 106 |
+
/**
|
| 107 |
+
* [r] Collected stall reason index
|
| 108 |
+
*/
|
| 109 |
+
uint32_t pcSamplingStallReasonIndex;
|
| 110 |
+
/**
|
| 111 |
+
* [r] Number of times the PC was sampled with the stallReason.
|
| 112 |
+
*/
|
| 113 |
+
uint32_t samples;
|
| 114 |
+
} CUpti_PCSamplingStallReason;
|
| 115 |
+
|
| 116 |
+
/**
|
| 117 |
+
* \brief PC Sampling data
|
| 118 |
+
*/
|
| 119 |
+
typedef struct PACKED_ALIGNMENT
|
| 120 |
+
{
|
| 121 |
+
/**
|
| 122 |
+
* [w] Size of the data structure.
|
| 123 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 124 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 125 |
+
*/
|
| 126 |
+
size_t size;
|
| 127 |
+
/**
|
| 128 |
+
* [r] Unique cubin id
|
| 129 |
+
*/
|
| 130 |
+
uint64_t cubinCrc;
|
| 131 |
+
/**
|
| 132 |
+
* [r] PC offset
|
| 133 |
+
*/
|
| 134 |
+
uint64_t pcOffset;
|
| 135 |
+
/**
|
| 136 |
+
* The function's unique symbol index in the module.
|
| 137 |
+
*/
|
| 138 |
+
uint32_t functionIndex;
|
| 139 |
+
/**
|
| 140 |
+
* Padding
|
| 141 |
+
*/
|
| 142 |
+
uint32_t pad;
|
| 143 |
+
/**
|
| 144 |
+
* [r] The function name. This name string might be shared across all the records
|
| 145 |
+
* including records from activity APIs representing the same function, and so it should not be
|
| 146 |
+
* modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to
|
| 147 |
+
* free the memory using free() function.
|
| 148 |
+
*/
|
| 149 |
+
char* functionName;
|
| 150 |
+
/**
|
| 151 |
+
* [r] Collected stall reason count
|
| 152 |
+
*/
|
| 153 |
+
size_t stallReasonCount;
|
| 154 |
+
/**
|
| 155 |
+
* [r] Stall reason id
|
| 156 |
+
* Total samples
|
| 157 |
+
*/
|
| 158 |
+
CUpti_PCSamplingStallReason *stallReason;
|
| 159 |
+
/**
|
| 160 |
+
* The correlation ID of the kernel to which this result is associated. Only valid for serialized mode of pc sampling collection.
|
| 161 |
+
* For continous mode of collection the correlationId will be set to 0.
|
| 162 |
+
*/
|
| 163 |
+
uint32_t correlationId;
|
| 164 |
+
} CUpti_PCSamplingPCData;
|
| 165 |
+
|
| 166 |
+
/**
|
| 167 |
+
* \brief PC Sampling output data format
|
| 168 |
+
*/
|
| 169 |
+
typedef enum
|
| 170 |
+
{
|
| 171 |
+
CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID = 0,
|
| 172 |
+
/**
|
| 173 |
+
* HW buffer data will be parsed during collection of data
|
| 174 |
+
*/
|
| 175 |
+
CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED = 1,
|
| 176 |
+
} CUpti_PCSamplingOutputDataFormat;
|
| 177 |
+
|
| 178 |
+
/**
|
| 179 |
+
* \brief Collected PC Sampling data
|
| 180 |
+
*
|
| 181 |
+
*/
|
| 182 |
+
typedef struct PACKED_ALIGNMENT
|
| 183 |
+
{
|
| 184 |
+
/**
|
| 185 |
+
* [w] Size of the data structure.
|
| 186 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 187 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 188 |
+
*/
|
| 189 |
+
size_t size;
|
| 190 |
+
/**
|
| 191 |
+
* [w] Number of PCs to be collected
|
| 192 |
+
*/
|
| 193 |
+
size_t collectNumPcs;
|
| 194 |
+
/**
|
| 195 |
+
* [r] Number of samples collected across all PCs.
|
| 196 |
+
* It includes samples for user modules, samples for non-user kernels and dropped samples.
|
| 197 |
+
* It includes counts for all non selected stall reasons.
|
| 198 |
+
* CUPTI does not provide PC records for non-user kernels.
|
| 199 |
+
* CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero.
|
| 200 |
+
*/
|
| 201 |
+
uint64_t totalSamples;
|
| 202 |
+
/**
|
| 203 |
+
* [r] Number of samples that were dropped by hardware due to backpressure/overflow.
|
| 204 |
+
*/
|
| 205 |
+
uint64_t droppedSamples;
|
| 206 |
+
/**
|
| 207 |
+
* [r] Number of PCs collected
|
| 208 |
+
*/
|
| 209 |
+
size_t totalNumPcs;
|
| 210 |
+
/**
|
| 211 |
+
* [r] Number of PCs available for collection
|
| 212 |
+
*/
|
| 213 |
+
size_t remainingNumPcs;
|
| 214 |
+
/**
|
| 215 |
+
* [r] Unique identifier for each range.
|
| 216 |
+
* Data collected across multiple ranges in multiple buffers can be identified using range id.
|
| 217 |
+
*/
|
| 218 |
+
uint64_t rangeId;
|
| 219 |
+
/**
|
| 220 |
+
* [r] Profiled PC data
|
| 221 |
+
* This data struct should have enough memory to collect number of PCs mentioned in \brief collectNumPcs
|
| 222 |
+
*/
|
| 223 |
+
CUpti_PCSamplingPCData *pPcData;
|
| 224 |
+
/**
|
| 225 |
+
* [r] Number of samples collected across all non user kernels PCs.
|
| 226 |
+
* It includes samples for non-user kernels.
|
| 227 |
+
* It includes counts for all non selected stall reasons as well.
|
| 228 |
+
* CUPTI does not provide PC records for non-user kernels.
|
| 229 |
+
*/
|
| 230 |
+
uint64_t nonUsrKernelsTotalSamples;
|
| 231 |
+
|
| 232 |
+
/**
|
| 233 |
+
* [r] Status of the hardware buffer.
|
| 234 |
+
* CUPTI returns the error code CUPTI_ERROR_OUT_OF_MEMORY when hardware buffer is full.
|
| 235 |
+
* When hardware buffer is full, user will get pc data as 0. To mitigate this issue, one or more of the below options can be tried:
|
| 236 |
+
* 1. Increase the hardware buffer size using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
|
| 237 |
+
* 2. Decrease the thread sleep span using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
|
| 238 |
+
* 3. Decrease the sampling frequency using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
|
| 239 |
+
*/
|
| 240 |
+
uint8_t hardwareBufferFull;
|
| 241 |
+
} CUpti_PCSamplingData;
|
| 242 |
+
|
| 243 |
+
/**
|
| 244 |
+
* \brief PC Sampling configuration attributes
|
| 245 |
+
*
|
| 246 |
+
* PC Sampling configuration attribute types. These attributes can be read
|
| 247 |
+
* using \ref cuptiPCSamplingGetConfigurationAttribute and can be written
|
| 248 |
+
* using \ref cuptiPCSamplingSetConfigurationAttribute. Attributes marked
|
| 249 |
+
* [r] can only be read using \ref cuptiPCSamplingGetConfigurationAttribute
|
| 250 |
+
* [w] can only be written using \ref cuptiPCSamplingSetConfigurationAttribute
|
| 251 |
+
* [rw] can be read using \ref cuptiPCSamplingGetConfigurationAttribute and
|
| 252 |
+
* written using \ref cuptiPCSamplingSetConfigurationAttribute
|
| 253 |
+
*/
|
| 254 |
+
typedef enum
|
| 255 |
+
{
|
| 256 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID = 0,
|
| 257 |
+
/**
|
| 258 |
+
* [rw] Sampling period for PC Sampling.
|
| 259 |
+
* DEFAULT - CUPTI defined value based on number of SMs
|
| 260 |
+
* Valid values for the sampling
|
| 261 |
+
* periods are between 5 to 31 both inclusive. This will set the
|
| 262 |
+
* sampling period to (2^samplingPeriod) cycles.
|
| 263 |
+
* For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31
|
| 264 |
+
* Value is a uint32_t
|
| 265 |
+
*/
|
| 266 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD = 1,
|
| 267 |
+
/**
|
| 268 |
+
* [w] Number of stall reasons to collect.
|
| 269 |
+
* DEFAULT - All stall reasons will be collected
|
| 270 |
+
* Value is a size_t
|
| 271 |
+
* [w] Stall reasons to collect
|
| 272 |
+
* DEFAULT - All stall reasons will be collected
|
| 273 |
+
* Input value should be a pointer pointing to array of stall reason indexes
|
| 274 |
+
* containing all the stall reason indexes to collect.
|
| 275 |
+
*/
|
| 276 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON = 2,
|
| 277 |
+
/**
|
| 278 |
+
* [rw] Size of SW buffer for raw PC counter data downloaded from HW buffer
|
| 279 |
+
* DEFAULT - 1 MB, which can accommodate approximately 5500 PCs
|
| 280 |
+
* with all stall reasons
|
| 281 |
+
* Approximately it takes 16 Bytes (and some fixed size memory)
|
| 282 |
+
* to accommodate one PC with one stall reason
|
| 283 |
+
* For e.g. 1 PC with 1 stall reason = 32 Bytes
|
| 284 |
+
* 1 PC with 2 stall reason = 48 Bytes
|
| 285 |
+
* 1 PC with 4 stall reason = 96 Bytes
|
| 286 |
+
* Value is a size_t
|
| 287 |
+
*/
|
| 288 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE = 3,
|
| 289 |
+
/**
|
| 290 |
+
* [rw] Size of HW buffer in bytes
|
| 291 |
+
* DEFAULT - 512 MB
|
| 292 |
+
* If sampling period is too less, HW buffer can overflow
|
| 293 |
+
* and drop PC data
|
| 294 |
+
* Value is a size_t
|
| 295 |
+
*/
|
| 296 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE = 4,
|
| 297 |
+
/**
|
| 298 |
+
* [rw] PC Sampling collection mode
|
| 299 |
+
* DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS
|
| 300 |
+
* Input value should be of type \ref CUpti_PCSamplingCollectionMode.
|
| 301 |
+
*/
|
| 302 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE = 5,
|
| 303 |
+
/**
|
| 304 |
+
* [rw] Control over PC Sampling data collection range
|
| 305 |
+
* Default - 0
|
| 306 |
+
* 1 - Allows user to start and stop PC Sampling using APIs -
|
| 307 |
+
* \ref cuptiPCSamplingStart() - Start PC Sampling
|
| 308 |
+
* \ref cuptiPCSamplingStop() - Stop PC Sampling
|
| 309 |
+
* Value is a uint32_t
|
| 310 |
+
*/
|
| 311 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL = 6,
|
| 312 |
+
/**
|
| 313 |
+
* [w] Value for output data format
|
| 314 |
+
* Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED
|
| 315 |
+
* Input value should be of type \ref CUpti_PCSamplingOutputDataFormat.
|
| 316 |
+
*/
|
| 317 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT = 7,
|
| 318 |
+
/**
|
| 319 |
+
* [w] Data buffer to hold collected PC Sampling data PARSED_DATA
|
| 320 |
+
* Default - none.
|
| 321 |
+
* Buffer type is void * which can point to PARSED_DATA
|
| 322 |
+
* Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
|
| 323 |
+
*/
|
| 324 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER = 8,
|
| 325 |
+
/**
|
| 326 |
+
* [rw] Control sleep time of the worker threads created by CUPTI for various PC sampling operations.
|
| 327 |
+
* CUPTI creates multiple worker threads to offload certain operations to these threads. This includes decoding of HW data to
|
| 328 |
+
* the CUPTI PC sampling data and correlating PC data to SASS instructions. CUPTI wakes up these threads periodically.
|
| 329 |
+
* Default - 100 milliseconds.
|
| 330 |
+
* Value is a uint32_t
|
| 331 |
+
*/
|
| 332 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN = 9,
|
| 333 |
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT = 0x7fffffff,
|
| 334 |
+
} CUpti_PCSamplingConfigurationAttributeType;
|
| 335 |
+
|
| 336 |
+
/**
|
| 337 |
+
* \brief PC sampling configuration information structure
|
| 338 |
+
*
|
| 339 |
+
* This structure provides \ref CUpti_PCSamplingConfigurationAttributeType which can be configured
|
| 340 |
+
* or queried for PC sampling configuration
|
| 341 |
+
*/
|
| 342 |
+
typedef struct
|
| 343 |
+
{
|
| 344 |
+
/**
|
| 345 |
+
* Refer \ref CUpti_PCSamplingConfigurationAttributeType for all supported attribute types
|
| 346 |
+
*/
|
| 347 |
+
CUpti_PCSamplingConfigurationAttributeType attributeType;
|
| 348 |
+
/*
|
| 349 |
+
* Configure or query status for \p attributeType
|
| 350 |
+
* CUPTI_SUCCESS for valid \p attributeType and \p attributeData
|
| 351 |
+
* CUPTI_ERROR_INVALID_OPERATION if \p attributeData is not valid
|
| 352 |
+
* CUPTI_ERROR_INVALID_PARAMETER if \p attributeType is not valid
|
| 353 |
+
*/
|
| 354 |
+
CUptiResult attributeStatus;
|
| 355 |
+
union
|
| 356 |
+
{
|
| 357 |
+
/**
|
| 358 |
+
* Invalid Value
|
| 359 |
+
*/
|
| 360 |
+
struct
|
| 361 |
+
{
|
| 362 |
+
uint64_t data[3];
|
| 363 |
+
} invalidData;
|
| 364 |
+
/**
|
| 365 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
|
| 366 |
+
*/
|
| 367 |
+
struct
|
| 368 |
+
{
|
| 369 |
+
uint32_t samplingPeriod;
|
| 370 |
+
} samplingPeriodData;
|
| 371 |
+
/**
|
| 372 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON
|
| 373 |
+
*/
|
| 374 |
+
struct
|
| 375 |
+
{
|
| 376 |
+
size_t stallReasonCount;
|
| 377 |
+
uint32_t *pStallReasonIndex;
|
| 378 |
+
} stallReasonData;
|
| 379 |
+
/**
|
| 380 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE
|
| 381 |
+
*/
|
| 382 |
+
struct
|
| 383 |
+
{
|
| 384 |
+
size_t scratchBufferSize;
|
| 385 |
+
} scratchBufferSizeData;
|
| 386 |
+
/**
|
| 387 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
|
| 388 |
+
*/
|
| 389 |
+
struct
|
| 390 |
+
{
|
| 391 |
+
size_t hardwareBufferSize;
|
| 392 |
+
} hardwareBufferSizeData;
|
| 393 |
+
/**
|
| 394 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE
|
| 395 |
+
*/
|
| 396 |
+
struct
|
| 397 |
+
{
|
| 398 |
+
CUpti_PCSamplingCollectionMode collectionMode;
|
| 399 |
+
} collectionModeData;
|
| 400 |
+
/**
|
| 401 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
|
| 402 |
+
*/
|
| 403 |
+
struct
|
| 404 |
+
{
|
| 405 |
+
uint32_t enableStartStopControl;
|
| 406 |
+
} enableStartStopControlData;
|
| 407 |
+
/**
|
| 408 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT
|
| 409 |
+
*/
|
| 410 |
+
struct
|
| 411 |
+
{
|
| 412 |
+
CUpti_PCSamplingOutputDataFormat outputDataFormat;
|
| 413 |
+
} outputDataFormatData;
|
| 414 |
+
/**
|
| 415 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER
|
| 416 |
+
*/
|
| 417 |
+
struct
|
| 418 |
+
{
|
| 419 |
+
void *samplingDataBuffer;
|
| 420 |
+
} samplingDataBufferData;
|
| 421 |
+
/**
|
| 422 |
+
* Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
|
| 423 |
+
*/
|
| 424 |
+
struct
|
| 425 |
+
{
|
| 426 |
+
uint32_t workerThreadPeriodicSleepSpan;
|
| 427 |
+
} workerThreadPeriodicSleepSpanData;
|
| 428 |
+
|
| 429 |
+
} attributeData;
|
| 430 |
+
} CUpti_PCSamplingConfigurationInfo;
|
| 431 |
+
|
| 432 |
+
/**
|
| 433 |
+
* \brief PC sampling configuration structure
|
| 434 |
+
*
|
| 435 |
+
* This structure configures PC sampling using \ref cuptiPCSamplingSetConfigurationAttribute
|
| 436 |
+
* and queries PC sampling default configuration using \ref cuptiPCSamplingGetConfigurationAttribute
|
| 437 |
+
*/
|
| 438 |
+
typedef struct
|
| 439 |
+
{
|
| 440 |
+
/**
|
| 441 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize
|
| 442 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 443 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 444 |
+
*/
|
| 445 |
+
size_t size;
|
| 446 |
+
/**
|
| 447 |
+
* [w] Assign to NULL
|
| 448 |
+
*/
|
| 449 |
+
void* pPriv;
|
| 450 |
+
/**
|
| 451 |
+
* [w] CUcontext
|
| 452 |
+
*/
|
| 453 |
+
CUcontext ctx;
|
| 454 |
+
/**
|
| 455 |
+
* [w] Number of attributes to configure using \ref cuptiPCSamplingSetConfigurationAttribute or query
|
| 456 |
+
* using \ref cuptiPCSamplingGetConfigurationAttribute
|
| 457 |
+
*/
|
| 458 |
+
size_t numAttributes;
|
| 459 |
+
/**
|
| 460 |
+
* Refer \ref CUpti_PCSamplingConfigurationInfo
|
| 461 |
+
*/
|
| 462 |
+
CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
|
| 463 |
+
} CUpti_PCSamplingConfigurationInfoParams;
|
| 464 |
+
#define CUpti_PCSamplingConfigurationInfoParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingConfigurationInfoParams,pPCSamplingConfigurationInfo)
|
| 465 |
+
|
| 466 |
+
/**
|
| 467 |
+
* \brief Write PC Sampling configuration attribute.
|
| 468 |
+
*
|
| 469 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
|
| 470 |
+
* containing PC sampling configuration.
|
| 471 |
+
*
|
| 472 |
+
* \retval CUPTI_SUCCESS
|
| 473 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
|
| 474 |
+
* some invalid \p attrib.
|
| 475 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if attribute \p value is not valid
|
| 476 |
+
* or any \p pParams is not valid
|
| 477 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 478 |
+
* does not support the API
|
| 479 |
+
*/
|
| 480 |
+
CUptiResult CUPTIAPI cuptiPCSamplingSetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
|
| 481 |
+
|
| 482 |
+
/**
|
| 483 |
+
* \brief Read PC Sampling configuration attribute.
|
| 484 |
+
*
|
| 485 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
|
| 486 |
+
* containing PC sampling configuration.
|
| 487 |
+
*
|
| 488 |
+
* \retval CUPTI_SUCCESS
|
| 489 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
|
| 490 |
+
* some invalid attribute.
|
| 491 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p attrib is not valid
|
| 492 |
+
* or any \p pParams is not valid
|
| 493 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that
|
| 494 |
+
* the \p value buffer is too small to hold the attribute value
|
| 495 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 496 |
+
* does not support the API
|
| 497 |
+
*/
|
| 498 |
+
CUptiResult CUPTIAPI cuptiPCSamplingGetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
|
| 499 |
+
|
| 500 |
+
/**
|
| 501 |
+
* \brief Params for cuptiPCSamplingEnable
|
| 502 |
+
*/
|
| 503 |
+
typedef struct
|
| 504 |
+
{
|
| 505 |
+
/**
|
| 506 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize
|
| 507 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 508 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 509 |
+
*/
|
| 510 |
+
size_t size;
|
| 511 |
+
/**
|
| 512 |
+
* [w] Assign to NULL
|
| 513 |
+
*/
|
| 514 |
+
void* pPriv;
|
| 515 |
+
/**
|
| 516 |
+
* [w] CUcontext
|
| 517 |
+
*/
|
| 518 |
+
CUcontext ctx;
|
| 519 |
+
/**
|
| 520 |
+
* \param pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA
|
| 521 |
+
* Buffer type is void * which can point to PARSED_DATA
|
| 522 |
+
* Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
|
| 523 |
+
*/
|
| 524 |
+
void *pcSamplingData;
|
| 525 |
+
} CUpti_PCSamplingGetDataParams;
|
| 526 |
+
#define CUpti_PCSamplingGetDataParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetDataParams, pcSamplingData)
|
| 527 |
+
/**
|
| 528 |
+
* \brief Flush GPU PC sampling data periodically.
|
| 529 |
+
*
|
| 530 |
+
* Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs:
|
| 531 |
+
* For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load
|
| 532 |
+
* For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends
|
| 533 |
+
* If configuration option \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
|
| 534 |
+
* is enabled, then after every range end i.e. \brief cuptiPCSamplingStop()
|
| 535 |
+
*
|
| 536 |
+
* If application is profiled in \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled
|
| 537 |
+
* \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload,
|
| 538 |
+
* user can collect data in two ways:
|
| 539 |
+
* Use \brief cuptiPCSamplingGetData() API periodically
|
| 540 |
+
* Use \brief cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling
|
| 541 |
+
* data buffer passed during configuration.
|
| 542 |
+
* Note: In case, \brief cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer
|
| 543 |
+
* passed during configuration should be large enough to hold all PCs data.
|
| 544 |
+
* \brief cuptiPCSamplingGetData() API never does device synchronization.
|
| 545 |
+
* It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case
|
| 546 |
+
* CUPTI provides only the data available with it at that moment.
|
| 547 |
+
*
|
| 548 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingGetDataParams
|
| 549 |
+
*
|
| 550 |
+
* \retval CUPTI_SUCCESS
|
| 551 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if this API is called without
|
| 552 |
+
* enabling PC sampling.
|
| 553 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 554 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 555 |
+
* \retval CUPTI_ERROR_OUT_OF_MEMORY indicates that the HW buffer is full
|
| 556 |
+
* does not support the API
|
| 557 |
+
*/
|
| 558 |
+
CUptiResult CUPTIAPI cuptiPCSamplingGetData(CUpti_PCSamplingGetDataParams *pParams);
|
| 559 |
+
|
| 560 |
+
/**
|
| 561 |
+
* \brief Params for cuptiPCSamplingEnable
|
| 562 |
+
*/
|
| 563 |
+
typedef struct
|
| 564 |
+
{
|
| 565 |
+
/**
|
| 566 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize
|
| 567 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 568 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 569 |
+
*/
|
| 570 |
+
size_t size;
|
| 571 |
+
/**
|
| 572 |
+
* [w] Assign to NULL
|
| 573 |
+
*/
|
| 574 |
+
void* pPriv;
|
| 575 |
+
/**
|
| 576 |
+
* [w] CUcontext
|
| 577 |
+
*/
|
| 578 |
+
CUcontext ctx;
|
| 579 |
+
} CUpti_PCSamplingEnableParams;
|
| 580 |
+
#define CUpti_PCSamplingEnableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingEnableParams, ctx)
|
| 581 |
+
|
| 582 |
+
/**
|
| 583 |
+
* \brief Enable PC sampling.
|
| 584 |
+
*
|
| 585 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingEnableParams
|
| 586 |
+
*
|
| 587 |
+
* \retval CUPTI_SUCCESS
|
| 588 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 589 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 590 |
+
* does not support the API
|
| 591 |
+
*/
|
| 592 |
+
CUptiResult CUPTIAPI cuptiPCSamplingEnable(CUpti_PCSamplingEnableParams *pParams);
|
| 593 |
+
|
| 594 |
+
/**
|
| 595 |
+
* \brief Params for cuptiPCSamplingDisable
|
| 596 |
+
*/
|
| 597 |
+
typedef struct
|
| 598 |
+
{
|
| 599 |
+
/**
|
| 600 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 601 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 602 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 603 |
+
*/
|
| 604 |
+
size_t size;
|
| 605 |
+
/**
|
| 606 |
+
* [w] Assign to NULL
|
| 607 |
+
*/
|
| 608 |
+
void* pPriv;
|
| 609 |
+
/**
|
| 610 |
+
* [w] CUcontext
|
| 611 |
+
*/
|
| 612 |
+
CUcontext ctx;
|
| 613 |
+
} CUpti_PCSamplingDisableParams;
|
| 614 |
+
#define CUpti_PCSamplingDisableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingDisableParams, ctx)
|
| 615 |
+
|
| 616 |
+
/**
|
| 617 |
+
* \brief Disable PC sampling.
|
| 618 |
+
*
|
| 619 |
+
* For application which doesn't destroy the CUDA context explicitly,
|
| 620 |
+
* this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided
|
| 621 |
+
* during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded.
|
| 622 |
+
*
|
| 623 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingDisableParams
|
| 624 |
+
*
|
| 625 |
+
* \retval CUPTI_SUCCESS
|
| 626 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 627 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 628 |
+
* does not support the API
|
| 629 |
+
*/
|
| 630 |
+
CUptiResult CUPTIAPI cuptiPCSamplingDisable(CUpti_PCSamplingDisableParams *pParams);
|
| 631 |
+
|
| 632 |
+
/**
|
| 633 |
+
* \brief Params for cuptiPCSamplingStart
|
| 634 |
+
*/
|
| 635 |
+
typedef struct
|
| 636 |
+
{
|
| 637 |
+
/**
|
| 638 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize
|
| 639 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 640 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 641 |
+
*/
|
| 642 |
+
size_t size;
|
| 643 |
+
/**
|
| 644 |
+
* [w] Assign to NULL
|
| 645 |
+
*/
|
| 646 |
+
void* pPriv;
|
| 647 |
+
/**
|
| 648 |
+
* [w] CUcontext
|
| 649 |
+
*/
|
| 650 |
+
CUcontext ctx;
|
| 651 |
+
} CUpti_PCSamplingStartParams;
|
| 652 |
+
#define CUpti_PCSamplingStartParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStartParams, ctx)
|
| 653 |
+
|
| 654 |
+
/**
|
| 655 |
+
* \brief Start PC sampling.
|
| 656 |
+
*
|
| 657 |
+
* User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
|
| 658 |
+
* This API can be used to mark starting of range. Set configuration option
|
| 659 |
+
* \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
|
| 660 |
+
*
|
| 661 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingStartParams
|
| 662 |
+
*
|
| 663 |
+
* \retval CUPTI_SUCCESS
|
| 664 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
|
| 665 |
+
* incorrect PC Sampling configuration.
|
| 666 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 667 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 668 |
+
* does not support the API
|
| 669 |
+
*/
|
| 670 |
+
CUptiResult CUPTIAPI cuptiPCSamplingStart(CUpti_PCSamplingStartParams *pParams);
|
| 671 |
+
|
| 672 |
+
/**
|
| 673 |
+
* \brief Params for cuptiPCSamplingStop
|
| 674 |
+
*/
|
| 675 |
+
typedef struct
|
| 676 |
+
{
|
| 677 |
+
/**
|
| 678 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize
|
| 679 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 680 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 681 |
+
*/
|
| 682 |
+
size_t size;
|
| 683 |
+
/**
|
| 684 |
+
* [w] Assign to NULL
|
| 685 |
+
*/
|
| 686 |
+
void* pPriv;
|
| 687 |
+
/**
|
| 688 |
+
* [w] CUcontext
|
| 689 |
+
*/
|
| 690 |
+
CUcontext ctx;
|
| 691 |
+
} CUpti_PCSamplingStopParams;
|
| 692 |
+
#define CUpti_PCSamplingStopParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStopParams, ctx)
|
| 693 |
+
|
| 694 |
+
/**
|
| 695 |
+
* \brief Stop PC sampling.
|
| 696 |
+
*
|
| 697 |
+
* User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
|
| 698 |
+
* This API can be used to mark end of range. Set configuration option
|
| 699 |
+
* \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
|
| 700 |
+
*
|
| 701 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingStopParams
|
| 702 |
+
*
|
| 703 |
+
* \retval CUPTI_SUCCESS
|
| 704 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
|
| 705 |
+
* incorrect PC Sampling configuration.
|
| 706 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 707 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 708 |
+
* does not support the API
|
| 709 |
+
*/
|
| 710 |
+
CUptiResult CUPTIAPI cuptiPCSamplingStop(CUpti_PCSamplingStopParams *pParams);
|
| 711 |
+
|
| 712 |
+
/**
|
| 713 |
+
* \brief Params for cuptiPCSamplingGetNumStallReasons
|
| 714 |
+
*/
|
| 715 |
+
typedef struct
|
| 716 |
+
{
|
| 717 |
+
/**
|
| 718 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize
|
| 719 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 720 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 721 |
+
*/
|
| 722 |
+
size_t size;
|
| 723 |
+
/**
|
| 724 |
+
* [w] Assign to NULL
|
| 725 |
+
*/
|
| 726 |
+
void* pPriv;
|
| 727 |
+
/**
|
| 728 |
+
* [w] CUcontext
|
| 729 |
+
*/
|
| 730 |
+
CUcontext ctx;
|
| 731 |
+
/**
|
| 732 |
+
* [r] Number of stall reasons
|
| 733 |
+
*/
|
| 734 |
+
size_t *numStallReasons;
|
| 735 |
+
} CUpti_PCSamplingGetNumStallReasonsParams;
|
| 736 |
+
#define CUpti_PCSamplingGetNumStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetNumStallReasonsParams, numStallReasons)
|
| 737 |
+
|
| 738 |
+
/**
|
| 739 |
+
* \brief Get PC sampling stall reason count.
|
| 740 |
+
*
|
| 741 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingGetNumStallReasonsParams
|
| 742 |
+
*
|
| 743 |
+
* \retval CUPTI_SUCCESS
|
| 744 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 745 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 746 |
+
* does not support the API
|
| 747 |
+
*/
|
| 748 |
+
CUptiResult CUPTIAPI cuptiPCSamplingGetNumStallReasons(CUpti_PCSamplingGetNumStallReasonsParams *pParams);
|
| 749 |
+
|
| 750 |
+
/**
|
| 751 |
+
* \brief Params for cuptiPCSamplingGetStallReasons
|
| 752 |
+
*/
|
| 753 |
+
typedef struct
|
| 754 |
+
{
|
| 755 |
+
/**
|
| 756 |
+
* [w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize
|
| 757 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 758 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 759 |
+
*/
|
| 760 |
+
size_t size;
|
| 761 |
+
/**
|
| 762 |
+
* [w] Assign to NULL
|
| 763 |
+
*/
|
| 764 |
+
void* pPriv;
|
| 765 |
+
/**
|
| 766 |
+
* [w] CUcontext
|
| 767 |
+
*/
|
| 768 |
+
CUcontext ctx;
|
| 769 |
+
/**
|
| 770 |
+
* [w] Number of stall reasons
|
| 771 |
+
*/
|
| 772 |
+
size_t numStallReasons;
|
| 773 |
+
/**
|
| 774 |
+
* [r] Stall reason index
|
| 775 |
+
*/
|
| 776 |
+
uint32_t *stallReasonIndex;
|
| 777 |
+
/**
|
| 778 |
+
* [r] Stall reasons name
|
| 779 |
+
*/
|
| 780 |
+
char **stallReasons;
|
| 781 |
+
} CUpti_PCSamplingGetStallReasonsParams;
|
| 782 |
+
#define CUpti_PCSamplingGetStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetStallReasonsParams, stallReasons)
|
| 783 |
+
|
| 784 |
+
/**
|
| 785 |
+
* \brief Get PC sampling stall reasons.
|
| 786 |
+
*
|
| 787 |
+
* \param pParams A pointer to \ref CUpti_PCSamplingGetStallReasonsParams
|
| 788 |
+
*
|
| 789 |
+
* \retval CUPTI_SUCCESS
|
| 790 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
|
| 791 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
|
| 792 |
+
* does not support the API
|
| 793 |
+
*/
|
| 794 |
+
CUptiResult CUPTIAPI cuptiPCSamplingGetStallReasons(CUpti_PCSamplingGetStallReasonsParams *pParams);
|
| 795 |
+
|
| 796 |
+
|
| 797 |
+
/**
|
| 798 |
+
* \brief Params for cuptiGetSassToSourceCorrelation
|
| 799 |
+
*/
|
| 800 |
+
typedef struct CUpti_GetSassToSourceCorrelationParams {
|
| 801 |
+
/**
|
| 802 |
+
* [w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize
|
| 803 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 804 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 805 |
+
*/
|
| 806 |
+
size_t size;
|
| 807 |
+
/**
|
| 808 |
+
* [w] Pointer to cubin binary where function belongs.
|
| 809 |
+
*/
|
| 810 |
+
const void* cubin;
|
| 811 |
+
/**
|
| 812 |
+
* [w] Function name to which PC belongs.
|
| 813 |
+
*/
|
| 814 |
+
const char *functionName;
|
| 815 |
+
/**
|
| 816 |
+
* [w] Size of cubin binary.
|
| 817 |
+
*/
|
| 818 |
+
size_t cubinSize;
|
| 819 |
+
/**
|
| 820 |
+
* [r] Line number in the source code.
|
| 821 |
+
*/
|
| 822 |
+
uint32_t lineNumber;
|
| 823 |
+
/**
|
| 824 |
+
* [w] PC offset
|
| 825 |
+
*/
|
| 826 |
+
uint64_t pcOffset;
|
| 827 |
+
/**
|
| 828 |
+
* [r] Path for the source file.
|
| 829 |
+
*/
|
| 830 |
+
char *fileName;
|
| 831 |
+
/**
|
| 832 |
+
* [r] Path for the directory of source file.
|
| 833 |
+
*/
|
| 834 |
+
char *dirName;
|
| 835 |
+
} CUpti_GetSassToSourceCorrelationParams;
|
| 836 |
+
|
| 837 |
+
#define CUpti_GetSassToSourceCorrelationParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetSassToSourceCorrelationParams, dirName)
|
| 838 |
+
|
| 839 |
+
/**
|
| 840 |
+
* \brief SASS to Source correlation.
|
| 841 |
+
*
|
| 842 |
+
* \param pParams A pointer to \ref CUpti_GetSassToSourceCorrelationParams
|
| 843 |
+
*
|
| 844 |
+
* It is expected from user to free allocated memory for fileName and dirName after use.
|
| 845 |
+
*
|
| 846 |
+
* \retval CUPTI_SUCCESS
|
| 847 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName
|
| 848 |
+
* is NULL or cubinSize is zero or size field is not set correctly.
|
| 849 |
+
* \retval CUPTI_ERROR_INVALID_MODULE provided cubin is invalid.
|
| 850 |
+
* \retval CUPTI_ERROR_UNKNOWN an internal error occurred.
|
| 851 |
+
* This error code is also used for cases when the function is not present in the module.
|
| 852 |
+
* A better error code will be returned in the future release.
|
| 853 |
+
*/
|
| 854 |
+
CUptiResult CUPTIAPI cuptiGetSassToSourceCorrelation(CUpti_GetSassToSourceCorrelationParams *pParams);
|
| 855 |
+
|
| 856 |
+
/**
|
| 857 |
+
* \brief Params for cuptiGetCubinCrc
|
| 858 |
+
*/
|
| 859 |
+
typedef struct {
|
| 860 |
+
/**
|
| 861 |
+
* [w] Size of configuration structure.
|
| 862 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 863 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 864 |
+
*/
|
| 865 |
+
size_t size;
|
| 866 |
+
/**
|
| 867 |
+
* [w] Size of cubin binary.
|
| 868 |
+
*/
|
| 869 |
+
size_t cubinSize;
|
| 870 |
+
/**
|
| 871 |
+
* [w] Pointer to cubin binary
|
| 872 |
+
*/
|
| 873 |
+
const void* cubin;
|
| 874 |
+
/**
|
| 875 |
+
* [r] Computed CRC will be stored in it.
|
| 876 |
+
*/
|
| 877 |
+
uint64_t cubinCrc;
|
| 878 |
+
} CUpti_GetCubinCrcParams;
|
| 879 |
+
#define CUpti_GetCubinCrcParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetCubinCrcParams, cubinCrc)
|
| 880 |
+
|
| 881 |
+
/**
|
| 882 |
+
* \brief Get the CRC of cubin.
|
| 883 |
+
*
|
| 884 |
+
* This function returns the CRC of provided cubin binary.
|
| 885 |
+
*
|
| 886 |
+
* \param pParams A pointer to \ref CUpti_GetCubinCrcParams
|
| 887 |
+
*
|
| 888 |
+
* \retval CUPTI_SUCCESS
|
| 889 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or
|
| 890 |
+
* provided cubinSize is zero or size field is not set.
|
| 891 |
+
*/
|
| 892 |
+
CUptiResult CUPTIAPI cuptiGetCubinCrc(CUpti_GetCubinCrcParams *pParams);
|
| 893 |
+
|
| 894 |
+
/**
|
| 895 |
+
* \brief Function type for callback used by CUPTI to request crc of
|
| 896 |
+
* loaded module.
|
| 897 |
+
*
|
| 898 |
+
* This callback function ask for crc of provided module in function.
|
| 899 |
+
* The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling
|
| 900 |
+
* struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module.
|
| 901 |
+
*
|
| 902 |
+
* \param cubin The pointer to cubin binary
|
| 903 |
+
* \param cubinSize The size of cubin binary.
|
| 904 |
+
* \param cubinCrc Returns the computed crc of cubin.
|
| 905 |
+
*/
|
| 906 |
+
typedef void (CUPTIAPI *CUpti_ComputeCrcCallbackFunc)(
|
| 907 |
+
const void* cubin,
|
| 908 |
+
size_t cubinSize,
|
| 909 |
+
uint64_t *cubinCrc);
|
| 910 |
+
|
| 911 |
+
/**
|
| 912 |
+
* \brief Register callback function with CUPTI to use
|
| 913 |
+
* your own algorithm to compute cubin crc.
|
| 914 |
+
*
|
| 915 |
+
* This function registers a callback function and it gets called
|
| 916 |
+
* from CUPTI when a CUDA module is loaded.
|
| 917 |
+
*
|
| 918 |
+
* \param funcComputeCubinCrc callback is invoked when a CUDA module
|
| 919 |
+
* is loaded.
|
| 920 |
+
*
|
| 921 |
+
* \retval CUPTI_SUCCESS
|
| 922 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p funcComputeCubinCrc is NULL.
|
| 923 |
+
*/
|
| 924 |
+
CUptiResult CUPTIAPI cuptiRegisterComputeCrcCallback(CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc);
|
| 925 |
+
|
| 926 |
+
/** @} */ /* END CUPTI_PCSAMPLING_API */
|
| 927 |
+
|
| 928 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 929 |
+
#pragma GCC visibility pop
|
| 930 |
+
#endif
|
| 931 |
+
|
| 932 |
+
#if defined(__cplusplus)
|
| 933 |
+
}
|
| 934 |
+
#endif
|
| 935 |
+
|
| 936 |
+
#endif /*_CUPTI_PCSAMPLING_H_*/
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_runtime_cbid.h
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
// *************************************************************************
|
| 3 |
+
// Definitions of indices for API functions, unique across entire API
|
| 4 |
+
// *************************************************************************
|
| 5 |
+
|
| 6 |
+
// This file is generated. Any changes you make will be lost during the next clean build.
|
| 7 |
+
// CUDA public interface, for type definitions and cu* function prototypes
|
| 8 |
+
|
| 9 |
+
#if !defined(_CUPTI_RUNTIME_CBID_H)
|
| 10 |
+
#define _CUPTI_RUNTIME_CBID_H
|
| 11 |
+
|
| 12 |
+
typedef enum CUpti_runtime_api_trace_cbid_enum {
|
| 13 |
+
CUPTI_RUNTIME_TRACE_CBID_INVALID = 0,
|
| 14 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020 = 1,
|
| 15 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaRuntimeGetVersion_v3020 = 2,
|
| 16 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceCount_v3020 = 3,
|
| 17 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v3020 = 4,
|
| 18 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaChooseDevice_v3020 = 5,
|
| 19 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetChannelDesc_v3020 = 6,
|
| 20 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateChannelDesc_v3020 = 7,
|
| 21 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020 = 8,
|
| 22 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetupArgument_v3020 = 9,
|
| 23 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 = 10,
|
| 24 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeekAtLastError_v3020 = 11,
|
| 25 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorString_v3020 = 12,
|
| 26 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020 = 13,
|
| 27 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetCacheConfig_v3020 = 14,
|
| 28 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetAttributes_v3020 = 15,
|
| 29 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 = 16,
|
| 30 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 = 17,
|
| 31 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetValidDevices_v3020 = 18,
|
| 32 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDeviceFlags_v3020 = 19,
|
| 33 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020 = 20,
|
| 34 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020 = 21,
|
| 35 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020 = 22,
|
| 36 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020 = 23,
|
| 37 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020 = 24,
|
| 38 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocHost_v3020 = 25,
|
| 39 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeHost_v3020 = 26,
|
| 40 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostAlloc_v3020 = 27,
|
| 41 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostGetDevicePointer_v3020 = 28,
|
| 42 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostGetFlags_v3020 = 29,
|
| 43 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemGetInfo_v3020 = 30,
|
| 44 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 = 31,
|
| 45 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020 = 32,
|
| 46 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020 = 33,
|
| 47 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020 = 34,
|
| 48 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020 = 35,
|
| 49 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020 = 36,
|
| 50 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020 = 37,
|
| 51 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020 = 38,
|
| 52 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020 = 39,
|
| 53 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020 = 40,
|
| 54 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020 = 41,
|
| 55 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020 = 42,
|
| 56 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020 = 43,
|
| 57 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020 = 44,
|
| 58 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020 = 45,
|
| 59 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020 = 46,
|
| 60 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020 = 47,
|
| 61 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020 = 48,
|
| 62 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020 = 49,
|
| 63 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_v3020 = 50,
|
| 64 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020 = 51,
|
| 65 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020 = 52,
|
| 66 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolAddress_v3020 = 53,
|
| 67 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolSize_v3020 = 54,
|
| 68 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture_v3020 = 55,
|
| 69 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture2D_v3020 = 56,
|
| 70 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToArray_v3020 = 57,
|
| 71 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUnbindTexture_v3020 = 58,
|
| 72 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureAlignmentOffset_v3020 = 59,
|
| 73 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureReference_v3020 = 60,
|
| 74 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindSurfaceToArray_v3020 = 61,
|
| 75 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceReference_v3020 = 62,
|
| 76 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLSetGLDevice_v3020 = 63,
|
| 77 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLRegisterBufferObject_v3020 = 64,
|
| 78 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObject_v3020 = 65,
|
| 79 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObject_v3020 = 66,
|
| 80 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnregisterBufferObject_v3020 = 67,
|
| 81 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLSetBufferObjectMapFlags_v3020 = 68,
|
| 82 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObjectAsync_v3020 = 69,
|
| 83 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObjectAsync_v3020 = 70,
|
| 84 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWGLGetDevice_v3020 = 71,
|
| 85 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterImage_v3020 = 72,
|
| 86 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterBuffer_v3020 = 73,
|
| 87 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnregisterResource_v3020 = 74,
|
| 88 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceSetMapFlags_v3020 = 75,
|
| 89 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsMapResources_v3020 = 76,
|
| 90 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnmapResources_v3020 = 77,
|
| 91 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedPointer_v3020 = 78,
|
| 92 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsSubResourceGetMappedArray_v3020 = 79,
|
| 93 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUGetDevice_v3020 = 80,
|
| 94 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUSetVDPAUDevice_v3020 = 81,
|
| 95 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterVideoSurface_v3020 = 82,
|
| 96 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterOutputSurface_v3020 = 83,
|
| 97 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevice_v3020 = 84,
|
| 98 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevices_v3020 = 85,
|
| 99 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11SetDirect3DDevice_v3020 = 86,
|
| 100 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D11RegisterResource_v3020 = 87,
|
| 101 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevice_v3020 = 88,
|
| 102 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevices_v3020 = 89,
|
| 103 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10SetDirect3DDevice_v3020 = 90,
|
| 104 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D10RegisterResource_v3020 = 91,
|
| 105 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10RegisterResource_v3020 = 92,
|
| 106 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnregisterResource_v3020 = 93,
|
| 107 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10MapResources_v3020 = 94,
|
| 108 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnmapResources_v3020 = 95,
|
| 109 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceSetMapFlags_v3020 = 96,
|
| 110 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetSurfaceDimensions_v3020 = 97,
|
| 111 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedArray_v3020 = 98,
|
| 112 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPointer_v3020 = 99,
|
| 113 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedSize_v3020 = 100,
|
| 114 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPitch_v3020 = 101,
|
| 115 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevice_v3020 = 102,
|
| 116 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevices_v3020 = 103,
|
| 117 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9SetDirect3DDevice_v3020 = 104,
|
| 118 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDirect3DDevice_v3020 = 105,
|
| 119 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D9RegisterResource_v3020 = 106,
|
| 120 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterResource_v3020 = 107,
|
| 121 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterResource_v3020 = 108,
|
| 122 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapResources_v3020 = 109,
|
| 123 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapResources_v3020 = 110,
|
| 124 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceSetMapFlags_v3020 = 111,
|
| 125 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetSurfaceDimensions_v3020 = 112,
|
| 126 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedArray_v3020 = 113,
|
| 127 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPointer_v3020 = 114,
|
| 128 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedSize_v3020 = 115,
|
| 129 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPitch_v3020 = 116,
|
| 130 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9Begin_v3020 = 117,
|
| 131 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9End_v3020 = 118,
|
| 132 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterVertexBuffer_v3020 = 119,
|
| 133 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterVertexBuffer_v3020 = 120,
|
| 134 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapVertexBuffer_v3020 = 121,
|
| 135 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapVertexBuffer_v3020 = 122,
|
| 136 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadExit_v3020 = 123,
|
| 137 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForDevice_v3020 = 124,
|
| 138 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForHost_v3020 = 125,
|
| 139 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020 = 126,
|
| 140 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetLimit_v3020 = 127,
|
| 141 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetLimit_v3020 = 128,
|
| 142 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreate_v3020 = 129,
|
| 143 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v3020 = 130,
|
| 144 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020 = 131,
|
| 145 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_v3020 = 132,
|
| 146 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 = 133,
|
| 147 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 = 134,
|
| 148 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 = 135,
|
| 149 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 = 136,
|
| 150 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020 = 137,
|
| 151 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020 = 138,
|
| 152 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v3020 = 139,
|
| 153 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020 = 140,
|
| 154 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020 = 141,
|
| 155 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_v3020 = 142,
|
| 156 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_v3020 = 143,
|
| 157 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020 = 144,
|
| 158 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020 = 145,
|
| 159 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetCacheConfig_v3020 = 146,
|
| 160 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020 = 147,
|
| 161 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDirect3DDevice_v3020 = 148,
|
| 162 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDirect3DDevice_v3020 = 149,
|
| 163 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetCacheConfig_v3020 = 150,
|
| 164 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPointerGetAttributes_v4000 = 151,
|
| 165 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostRegister_v4000 = 152,
|
| 166 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostUnregister_v4000 = 153,
|
| 167 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceCanAccessPeer_v4000 = 154,
|
| 168 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceEnablePeerAccess_v4000 = 155,
|
| 169 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceDisablePeerAccess_v4000 = 156,
|
| 170 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerRegister_v4000 = 157,
|
| 171 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerUnregister_v4000 = 158,
|
| 172 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerGetDevicePointer_v4000 = 159,
|
| 173 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000 = 160,
|
| 174 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000 = 161,
|
| 175 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000 = 162,
|
| 176 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000 = 163,
|
| 177 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceReset_v3020 = 164,
|
| 178 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020 = 165,
|
| 179 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetLimit_v3020 = 166,
|
| 180 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetLimit_v3020 = 167,
|
| 181 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetCacheConfig_v3020 = 168,
|
| 182 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetCacheConfig_v3020 = 169,
|
| 183 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerInitialize_v4000 = 170,
|
| 184 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStart_v4000 = 171,
|
| 185 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStop_v4000 = 172,
|
| 186 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetByPCIBusId_v4010 = 173,
|
| 187 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetPCIBusId_v4010 = 174,
|
| 188 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLGetDevices_v4010 = 175,
|
| 189 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetEventHandle_v4010 = 176,
|
| 190 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenEventHandle_v4010 = 177,
|
| 191 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetMemHandle_v4010 = 178,
|
| 192 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenMemHandle_v4010 = 179,
|
| 193 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcCloseMemHandle_v4010 = 180,
|
| 194 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetInfo_v4010 = 181,
|
| 195 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetSharedMemConfig_v4020 = 182,
|
| 196 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetSharedMemConfig_v4020 = 183,
|
| 197 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetSharedMemConfig_v4020 = 184,
|
| 198 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v5000 = 185,
|
| 199 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyTextureObject_v5000 = 186,
|
| 200 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceDesc_v5000 = 187,
|
| 201 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v5000 = 188,
|
| 202 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateSurfaceObject_v5000 = 189,
|
| 203 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroySurfaceObject_v5000 = 190,
|
| 204 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceObjectResourceDesc_v5000 = 191,
|
| 205 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocMipmappedArray_v5000 = 192,
|
| 206 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetMipmappedArrayLevel_v5000 = 193,
|
| 207 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeMipmappedArray_v5000 = 194,
|
| 208 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToMipmappedArray_v5000 = 195,
|
| 209 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedMipmappedArray_v5000 = 196,
|
| 210 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_v5000 = 197,
|
| 211 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithFlags_v5000 = 198,
|
| 212 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceViewDesc_v5000 = 199,
|
| 213 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetAttribute_v5000 = 200,
|
| 214 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v5050 = 201,
|
| 215 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithPriority_v5050 = 202,
|
| 216 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_v5050 = 203,
|
| 217 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_v5050 = 204,
|
| 218 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetStreamPriorityRange_v5050 = 205,
|
| 219 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocManaged_v6000 = 206,
|
| 220 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207,
|
| 221 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_v6000 = 208,
|
| 222 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorName_v6050 = 209,
|
| 223 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210,
|
| 224 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 = 211,
|
| 225 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceFlags_v7000 = 212,
|
| 226 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_ptsz_v7000 = 213,
|
| 227 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000 = 214,
|
| 228 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_ptds_v7000 = 215,
|
| 229 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_ptds_v7000 = 216,
|
| 230 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_ptds_v7000 = 217,
|
| 231 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_ptds_v7000 = 218,
|
| 232 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_ptds_v7000 = 219,
|
| 233 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_ptds_v7000 = 220,
|
| 234 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_ptds_v7000 = 221,
|
| 235 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_ptds_v7000 = 222,
|
| 236 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_ptds_v7000 = 223,
|
| 237 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_ptds_v7000 = 224,
|
| 238 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_ptsz_v7000 = 225,
|
| 239 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_ptsz_v7000 = 226,
|
| 240 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_ptsz_v7000 = 227,
|
| 241 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_ptsz_v7000 = 228,
|
| 242 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_ptsz_v7000 = 229,
|
| 243 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_ptsz_v7000 = 230,
|
| 244 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_ptsz_v7000 = 231,
|
| 245 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_ptsz_v7000 = 232,
|
| 246 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset_ptds_v7000 = 233,
|
| 247 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_ptds_v7000 = 234,
|
| 248 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_ptsz_v7000 = 235,
|
| 249 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_ptsz_v7000 = 236,
|
| 250 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_ptsz_v7000 = 237,
|
| 251 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_ptsz_v7000 = 238,
|
| 252 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_ptsz_v7000 = 239,
|
| 253 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_ptsz_v7000 = 240,
|
| 254 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_ptsz_v7000 = 241,
|
| 255 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_ptsz_v7000 = 242,
|
| 256 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_ptds_v7000 = 243,
|
| 257 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_ptsz_v7000 = 244,
|
| 258 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_ptds_v7000 = 245,
|
| 259 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_ptsz_v7000 = 246,
|
| 260 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_ptsz_v7000 = 247,
|
| 261 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_ptsz_v7000 = 248,
|
| 262 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_ptds_v7000 = 249,
|
| 263 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_ptsz_v7000 = 250,
|
| 264 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000 = 251,
|
| 265 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v8000 = 252,
|
| 266 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_ptsz_v8000 = 253,
|
| 267 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v8000 = 254,
|
| 268 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetP2PAttribute_v8000 = 255,
|
| 269 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsEGLRegisterImage_v7000 = 256,
|
| 270 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnect_v7000 = 257,
|
| 271 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerDisconnect_v7000 = 258,
|
| 272 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerAcquireFrame_v7000 = 259,
|
| 273 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerReleaseFrame_v7000 = 260,
|
| 274 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerConnect_v7000 = 261,
|
| 275 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerDisconnect_v7000 = 262,
|
| 276 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerPresentFrame_v7000 = 263,
|
| 277 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerReturnFrame_v7000 = 264,
|
| 278 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedEglFrame_v7000 = 265,
|
| 279 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttribute_v8000 = 266,
|
| 280 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttributes_v8000 = 267,
|
| 281 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnectWithFlags_v7000 = 268,
|
| 282 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000 = 269,
|
| 283 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_ptsz_v9000 = 270,
|
| 284 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateFromEGLSync_v9000 = 271,
|
| 285 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000 = 272,
|
| 286 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetAttribute_v9000 = 273,
|
| 287 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalMemory_v10000 = 274,
|
| 288 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedBuffer_v10000 = 275,
|
| 289 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedMipmappedArray_v10000 = 276,
|
| 290 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalMemory_v10000 = 277,
|
| 291 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalSemaphore_v10000 = 278,
|
| 292 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v10000 = 279,
|
| 293 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_ptsz_v10000 = 280,
|
| 294 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v10000 = 281,
|
| 295 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_ptsz_v10000 = 282,
|
| 296 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalSemaphore_v10000 = 283,
|
| 297 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_v10000 = 284,
|
| 298 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_ptsz_v10000 = 285,
|
| 299 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphCreate_v10000 = 286,
|
| 300 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetParams_v10000 = 287,
|
| 301 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetParams_v10000 = 288,
|
| 302 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddKernelNode_v10000 = 289,
|
| 303 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode_v10000 = 290,
|
| 304 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeGetParams_v10000 = 291,
|
| 305 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams_v10000 = 292,
|
| 306 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemsetNode_v10000 = 293,
|
| 307 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeGetParams_v10000 = 294,
|
| 308 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeSetParams_v10000 = 295,
|
| 309 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddHostNode_v10000 = 296,
|
| 310 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeGetParams_v10000 = 297,
|
| 311 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddChildGraphNode_v10000 = 298,
|
| 312 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphChildGraphNodeGetGraph_v10000 = 299,
|
| 313 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEmptyNode_v10000 = 300,
|
| 314 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphClone_v10000 = 301,
|
| 315 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeFindInClone_v10000 = 302,
|
| 316 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetType_v10000 = 303,
|
| 317 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetRootNodes_v10000 = 304,
|
| 318 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v10000 = 305,
|
| 319 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v10000 = 306,
|
| 320 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v10000 = 307,
|
| 321 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v10000 = 308,
|
| 322 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroyNode_v10000 = 309,
|
| 323 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v10000 = 310,
|
| 324 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000 = 311,
|
| 325 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000 = 312,
|
| 326 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecDestroy_v10000 = 313,
|
| 327 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroy_v10000 = 314,
|
| 328 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_v10000 = 315,
|
| 329 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_ptsz_v10000 = 316,
|
| 330 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_v10000 = 317,
|
| 331 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_ptsz_v10000 = 318,
|
| 332 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_v10000 = 319,
|
| 333 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_ptsz_v10000 = 320,
|
| 334 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeSetParams_v10000 = 321,
|
| 335 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetNodes_v10000 = 322,
|
| 336 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v10000 = 323,
|
| 337 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v10010 = 324,
|
| 338 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_ptsz_v10010 = 325,
|
| 339 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecKernelNodeSetParams_v10010 = 326,
|
| 340 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadExchangeStreamCaptureMode_v10010 = 327,
|
| 341 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetNvSciSyncAttributes_v10020 = 328,
|
| 342 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyAvailableDynamicSMemPerBlock_v10200 = 329,
|
| 343 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_v10200 = 330,
|
| 344 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_ptsz_v10200 = 331,
|
| 345 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams_v10020 = 332,
|
| 346 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemsetNodeSetParams_v10020 = 333,
|
| 347 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecHostNodeSetParams_v10020 = 334,
|
| 348 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecUpdate_v10020 = 335,
|
| 349 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetFuncBySymbol_v11000 = 336,
|
| 350 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCtxResetPersistingL2Cache_v11000 = 337,
|
| 351 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeCopyAttributes_v11000 = 338,
|
| 352 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetAttribute_v11000 = 339,
|
| 353 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetAttribute_v11000 = 340,
|
| 354 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_v11000 = 341,
|
| 355 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_ptsz_v11000 = 342,
|
| 356 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_v11000 = 343,
|
| 357 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_ptsz_v11000 = 344,
|
| 358 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_v11000 = 345,
|
| 359 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000 = 346,
|
| 360 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetTexture1DLinearMaxWidth_v11010 = 347,
|
| 361 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_v10000 = 348,
|
| 362 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_ptsz_v10000 = 349,
|
| 363 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeToSymbol_v11010 = 350,
|
| 364 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeFromSymbol_v11010 = 351,
|
| 365 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode1D_v11010 = 352,
|
| 366 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsToSymbol_v11010 = 353,
|
| 367 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsFromSymbol_v11010 = 354,
|
| 368 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams1D_v11010 = 355,
|
| 369 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010 = 356,
|
| 370 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010 = 357,
|
| 371 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams1D_v11010 = 358,
|
| 372 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetSparseProperties_v11010 = 359,
|
| 373 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetSparseProperties_v11010 = 360,
|
| 374 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecChildGraphNodeSetParams_v11010 = 361,
|
| 375 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventRecordNode_v11010 = 362,
|
| 376 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeGetEvent_v11010 = 363,
|
| 377 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeSetEvent_v11010 = 364,
|
| 378 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventWaitNode_v11010 = 365,
|
| 379 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeGetEvent_v11010 = 366,
|
| 380 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeSetEvent_v11010 = 367,
|
| 381 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventRecordNodeSetEvent_v11010 = 368,
|
| 382 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventWaitNodeSetEvent_v11010 = 369,
|
| 383 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_v11010 = 370,
|
| 384 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_ptsz_v11010 = 371,
|
| 385 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetDefaultMemPool_v11020 = 372,
|
| 386 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_v11020 = 373,
|
| 387 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_ptsz_v11020 = 374,
|
| 388 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_v11020 = 375,
|
| 389 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_ptsz_v11020 = 376,
|
| 390 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolTrimTo_v11020 = 377,
|
| 391 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAttribute_v11020 = 378,
|
| 392 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAttribute_v11020 = 379,
|
| 393 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAccess_v11020 = 380,
|
| 394 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetPlane_v11020 = 381,
|
| 395 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAccess_v11020 = 382,
|
| 396 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolCreate_v11020 = 383,
|
| 397 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolDestroy_v11020 = 384,
|
| 398 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetMemPool_v11020 = 385,
|
| 399 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetMemPool_v11020 = 386,
|
| 400 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportToShareableHandle_v11020 = 387,
|
| 401 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportFromShareableHandle_v11020 = 388,
|
| 402 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportPointer_v11020 = 389,
|
| 403 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportPointer_v11020 = 390,
|
| 404 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_v11020 = 391,
|
| 405 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_ptsz_v11020 = 392,
|
| 406 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_v11020 = 393,
|
| 407 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020 = 394,
|
| 408 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_v11020 = 395,
|
| 409 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020 = 396,
|
| 410 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresSignalNode_v11020 = 397,
|
| 411 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeGetParams_v11020 = 398,
|
| 412 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeSetParams_v11020 = 399,
|
| 413 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresWaitNode_v11020 = 400,
|
| 414 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeGetParams_v11020 = 401,
|
| 415 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeSetParams_v11020 = 402,
|
| 416 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020 = 403,
|
| 417 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020 = 404,
|
| 418 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceFlushGPUDirectRDMAWrites_v11030 = 405,
|
| 419 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_v11030 = 406,
|
| 420 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_ptsz_v11030 = 407,
|
| 421 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDebugDotPrint_v11030 = 408,
|
| 422 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_v11030 = 409,
|
| 423 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_ptsz_v11030 = 410,
|
| 424 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v11030 = 411,
|
| 425 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_ptsz_v11030 = 412,
|
| 426 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectCreate_v11030 = 413,
|
| 427 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRetain_v11030 = 414,
|
| 428 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRelease_v11030 = 415,
|
| 429 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphRetainUserObject_v11030 = 416,
|
| 430 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphReleaseUserObject_v11030 = 417,
|
| 431 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithFlags_v11040 = 418,
|
| 432 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemAllocNode_v11040 = 419,
|
| 433 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemAllocNodeGetParams_v11040 = 420,
|
| 434 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemFreeNode_v11040 = 421,
|
| 435 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemFreeNodeGetParams_v11040 = 422,
|
| 436 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGraphMemTrim_v11040 = 423,
|
| 437 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetGraphMemAttribute_v11040 = 424,
|
| 438 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetGraphMemAttribute_v11040 = 425,
|
| 439 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetEnabled_v11060 = 426,
|
| 440 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetEnabled_v11060 = 427,
|
| 441 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetMemoryRequirements_v11060 = 428,
|
| 442 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetMemoryRequirements_v11060 = 429,
|
| 443 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060 = 430,
|
| 444 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060 = 431,
|
| 445 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxPotentialClusterSize_v11070 = 432,
|
| 446 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveClusters_v11070 = 433,
|
| 447 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v2_v11080 = 434,
|
| 448 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v2_v11080 = 435,
|
| 449 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithParams_v12000 = 436,
|
| 450 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithParams_ptsz_v12000 = 437,
|
| 451 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecGetFlags_v12000 = 438,
|
| 452 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetKernel_v12000 = 439,
|
| 453 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v2_v12000 = 440,
|
| 454 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetId_v12000 = 441,
|
| 455 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetId_ptsz_v12000 = 442,
|
| 456 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v12000 = 443,
|
| 457 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaInitDevice_v12000 = 444,
|
| 458 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddNode_v12020 = 445,
|
| 459 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetParams_v12020 = 446,
|
| 460 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecNodeSetParams_v12020 = 447,
|
| 461 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v2_v12020 = 448,
|
| 462 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v2_v12020 = 449,
|
| 463 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v2_ptsz_v12020 = 450,
|
| 464 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetName_v12030 = 451,
|
| 465 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCaptureToGraph_v12030 = 452,
|
| 466 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCaptureToGraph_ptsz_v12030 = 453,
|
| 467 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphConditionalHandleCreate_v12030 = 454,
|
| 468 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v2_v12030 = 455,
|
| 469 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v2_v12030 = 456,
|
| 470 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v2_v12030 = 457,
|
| 471 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v2_v12030 = 458,
|
| 472 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v2_v12030 = 459,
|
| 473 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddNode_v2_v12030 = 460,
|
| 474 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v3_v12030 = 461,
|
| 475 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v3_ptsz_v12030 = 462,
|
| 476 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v2_v12030 = 463,
|
| 477 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v2_ptsz_v12030 = 464,
|
| 478 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceRegisterAsyncNotification_v12040 = 465,
|
| 479 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceUnregisterAsyncNotification_v12040 = 466,
|
| 480 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetParamInfo_v12040 = 467,
|
| 481 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPointByVersion_v12050 = 468,
|
| 482 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPointByVersion_ptsz_v12050 = 469,
|
| 483 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda470_v12060 = 470,
|
| 484 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda471_v12060 = 471,
|
| 485 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda472_v12060 = 472,
|
| 486 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda473_v12060 = 473,
|
| 487 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda474_v12060 = 474,
|
| 488 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda475_v12060 = 475,
|
| 489 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda476_v12060 = 476,
|
| 490 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda477_v12060 = 477,
|
| 491 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda478_v12060 = 478,
|
| 492 |
+
CUPTI_RUNTIME_TRACE_CBID_cuda479_v12060 = 479,
|
| 493 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetDevice_v12080 = 480,
|
| 494 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetDevice_ptsz_v12080 = 481,
|
| 495 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyBatchAsync_v12080 = 482,
|
| 496 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyBatchAsync_ptsz_v12080 = 483,
|
| 497 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DBatchAsync_v12080 = 484,
|
| 498 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DBatchAsync_ptsz_v12080 = 485,
|
| 499 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v2_v12080 = 486,
|
| 500 |
+
CUPTI_RUNTIME_TRACE_CBID_SIZE = 487,
|
| 501 |
+
CUPTI_RUNTIME_TRACE_CBID_FORCE_INT = 0x7fffffff
|
| 502 |
+
} CUpti_runtime_api_trace_cbid;
|
| 503 |
+
|
| 504 |
+
#endif
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/device_functions.h
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__)
|
| 51 |
+
#if defined(_MSC_VER)
|
| 52 |
+
#pragma message("device_functions.h is an internal header file and must not be used directly. This file will be removed in a future CUDA release. Please use cuda_runtime_api.h or cuda_runtime.h instead.")
|
| 53 |
+
#else
|
| 54 |
+
#warning "device_functions.h is an internal header file and must not be used directly. This file will be removed in a future CUDA release. Please use cuda_runtime_api.h or cuda_runtime.h instead."
|
| 55 |
+
#endif
|
| 56 |
+
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
| 57 |
+
#define __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DEVICE_FUNCTIONS_H_WRAPPER__
|
| 58 |
+
#endif
|
| 59 |
+
|
| 60 |
+
#include "crt/device_functions.h"
|
| 61 |
+
|
| 62 |
+
#if defined(__UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DEVICE_FUNCTIONS_H_WRAPPER__)
|
| 63 |
+
#undef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
| 64 |
+
#undef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DEVICE_FUNCTIONS_H_WRAPPER__
|
| 65 |
+
#endif
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/library_types.h
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__LIBRARY_TYPES_H__)
|
| 51 |
+
#define __LIBRARY_TYPES_H__
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
#ifndef __CUDACC_RTC_MINIMAL__
|
| 56 |
+
|
| 57 |
+
typedef enum cudaDataType_t
|
| 58 |
+
{
|
| 59 |
+
CUDA_R_16F = 2, /* real as a half */
|
| 60 |
+
CUDA_C_16F = 6, /* complex as a pair of half numbers */
|
| 61 |
+
CUDA_R_16BF = 14, /* real as a nv_bfloat16 */
|
| 62 |
+
CUDA_C_16BF = 15, /* complex as a pair of nv_bfloat16 numbers */
|
| 63 |
+
CUDA_R_32F = 0, /* real as a float */
|
| 64 |
+
CUDA_C_32F = 4, /* complex as a pair of float numbers */
|
| 65 |
+
CUDA_R_64F = 1, /* real as a double */
|
| 66 |
+
CUDA_C_64F = 5, /* complex as a pair of double numbers */
|
| 67 |
+
CUDA_R_4I = 16, /* real as a signed 4-bit int */
|
| 68 |
+
CUDA_C_4I = 17, /* complex as a pair of signed 4-bit int numbers */
|
| 69 |
+
CUDA_R_4U = 18, /* real as a unsigned 4-bit int */
|
| 70 |
+
CUDA_C_4U = 19, /* complex as a pair of unsigned 4-bit int numbers */
|
| 71 |
+
CUDA_R_8I = 3, /* real as a signed 8-bit int */
|
| 72 |
+
CUDA_C_8I = 7, /* complex as a pair of signed 8-bit int numbers */
|
| 73 |
+
CUDA_R_8U = 8, /* real as a unsigned 8-bit int */
|
| 74 |
+
CUDA_C_8U = 9, /* complex as a pair of unsigned 8-bit int numbers */
|
| 75 |
+
CUDA_R_16I = 20, /* real as a signed 16-bit int */
|
| 76 |
+
CUDA_C_16I = 21, /* complex as a pair of signed 16-bit int numbers */
|
| 77 |
+
CUDA_R_16U = 22, /* real as a unsigned 16-bit int */
|
| 78 |
+
CUDA_C_16U = 23, /* complex as a pair of unsigned 16-bit int numbers */
|
| 79 |
+
CUDA_R_32I = 10, /* real as a signed 32-bit int */
|
| 80 |
+
CUDA_C_32I = 11, /* complex as a pair of signed 32-bit int numbers */
|
| 81 |
+
CUDA_R_32U = 12, /* real as a unsigned 32-bit int */
|
| 82 |
+
CUDA_C_32U = 13, /* complex as a pair of unsigned 32-bit int numbers */
|
| 83 |
+
CUDA_R_64I = 24, /* real as a signed 64-bit int */
|
| 84 |
+
CUDA_C_64I = 25, /* complex as a pair of signed 64-bit int numbers */
|
| 85 |
+
CUDA_R_64U = 26, /* real as a unsigned 64-bit int */
|
| 86 |
+
CUDA_C_64U = 27, /* complex as a pair of unsigned 64-bit int numbers */
|
| 87 |
+
CUDA_R_8F_E4M3 = 28, /* real as a nv_fp8_e4m3 */
|
| 88 |
+
CUDA_R_8F_UE4M3 = CUDA_R_8F_E4M3, /* real as an unsigned nv_fp8_e4m3 */
|
| 89 |
+
CUDA_R_8F_E5M2 = 29, /* real as a nv_fp8_e5m2 */
|
| 90 |
+
CUDA_R_8F_UE8M0 = 30, /* real as an exponent-only unsigned nv_fp8_e8m0 */
|
| 91 |
+
CUDA_R_6F_E2M3 = 31, /* real as a nv_fp6_e2m3 */
|
| 92 |
+
CUDA_R_6F_E3M2 = 32, /* real as a nv_fp6_e3m2 */
|
| 93 |
+
CUDA_R_4F_E2M1 = 33, /* real as a nv_fp4_e2m1 */
|
| 94 |
+
} cudaDataType;
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
typedef enum libraryPropertyType_t
|
| 98 |
+
{
|
| 99 |
+
MAJOR_VERSION,
|
| 100 |
+
MINOR_VERSION,
|
| 101 |
+
PATCH_LEVEL
|
| 102 |
+
} libraryPropertyType;
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
#ifndef __cplusplus
|
| 106 |
+
typedef enum cudaDataType_t cudaDataType_t;
|
| 107 |
+
typedef enum libraryPropertyType_t libraryPropertyType_t;
|
| 108 |
+
#endif
|
| 109 |
+
|
| 110 |
+
#endif /* !__CUDACC_RTC_MINIMAL__ */
|
| 111 |
+
#endif /* !__LIBRARY_TYPES_H__ */
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/nvperf_cuda_host.h
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef NVPERF_CUDA_HOST_H
|
| 2 |
+
#define NVPERF_CUDA_HOST_H
|
| 3 |
+
|
| 4 |
+
/*
|
| 5 |
+
* Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
|
| 6 |
+
*
|
| 7 |
+
* NOTICE TO USER:
|
| 8 |
+
*
|
| 9 |
+
* This source code is subject to NVIDIA ownership rights under U.S. and
|
| 10 |
+
* international Copyright laws.
|
| 11 |
+
*
|
| 12 |
+
* This software and the information contained herein is PROPRIETARY and
|
| 13 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
|
| 14 |
+
* of a form of NVIDIA software license agreement.
|
| 15 |
+
*
|
| 16 |
+
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
| 17 |
+
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
| 18 |
+
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
| 19 |
+
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
| 20 |
+
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 21 |
+
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
| 22 |
+
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
| 23 |
+
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
| 24 |
+
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
| 25 |
+
* OR PERFORMANCE OF THIS SOURCE CODE.
|
| 26 |
+
*
|
| 27 |
+
* U.S. Government End Users. This source code is a "commercial item" as
|
| 28 |
+
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
| 29 |
+
* "commercial computer software" and "commercial computer software
|
| 30 |
+
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
| 31 |
+
* and is provided to the U.S. Government only as a commercial end item.
|
| 32 |
+
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
| 33 |
+
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
| 34 |
+
* source code with only those rights set forth herein.
|
| 35 |
+
*
|
| 36 |
+
* Any use of this source code in individual and commercial software must
|
| 37 |
+
* include, in the user documentation and internal comments to the code,
|
| 38 |
+
* the above Disclaimer and U.S. Government End Users Notice.
|
| 39 |
+
*/
|
| 40 |
+
|
| 41 |
+
#include <stddef.h>
|
| 42 |
+
#include <stdint.h>
|
| 43 |
+
#include "nvperf_common.h"
|
| 44 |
+
#include "nvperf_host.h"
|
| 45 |
+
|
| 46 |
+
#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
|
| 47 |
+
#pragma GCC visibility push(default)
|
| 48 |
+
#if !defined(NVPW_LOCAL)
|
| 49 |
+
#define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
|
| 50 |
+
#endif
|
| 51 |
+
#else
|
| 52 |
+
#if !defined(NVPW_LOCAL)
|
| 53 |
+
#define NVPW_LOCAL
|
| 54 |
+
#endif
|
| 55 |
+
#endif
|
| 56 |
+
|
| 57 |
+
#ifdef __cplusplus
|
| 58 |
+
extern "C" {
|
| 59 |
+
#endif
|
| 60 |
+
|
| 61 |
+
/**
|
| 62 |
+
* @file nvperf_cuda_host.h
|
| 63 |
+
*/
|
| 64 |
+
|
| 65 |
+
typedef struct NVPW_CUDA_RawMetricsConfig_Create_Params
|
| 66 |
+
{
|
| 67 |
+
/// [in]
|
| 68 |
+
size_t structSize;
|
| 69 |
+
/// [in] assign to NULL
|
| 70 |
+
void* pPriv;
|
| 71 |
+
/// [in]
|
| 72 |
+
NVPA_ActivityKind activityKind;
|
| 73 |
+
/// [in]
|
| 74 |
+
const char* pChipName;
|
| 75 |
+
/// [out] new NVPA_RawMetricsConfig object
|
| 76 |
+
struct NVPA_RawMetricsConfig* pRawMetricsConfig;
|
| 77 |
+
} NVPW_CUDA_RawMetricsConfig_Create_Params;
|
| 78 |
+
#define NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_Params, pRawMetricsConfig)
|
| 79 |
+
|
| 80 |
+
NVPA_Status NVPW_CUDA_RawMetricsConfig_Create(NVPW_CUDA_RawMetricsConfig_Create_Params* pParams);
|
| 81 |
+
|
| 82 |
+
typedef struct NVPW_CUDA_RawMetricsConfig_Create_V2_Params
|
| 83 |
+
{
|
| 84 |
+
/// [in]
|
| 85 |
+
size_t structSize;
|
| 86 |
+
/// [in] assign to NULL
|
| 87 |
+
void* pPriv;
|
| 88 |
+
/// [in]
|
| 89 |
+
NVPA_ActivityKind activityKind;
|
| 90 |
+
/// [in] accepted for chips supported at the time-of-release.
|
| 91 |
+
const char* pChipName;
|
| 92 |
+
/// [in] buffer with counter availability image - required for future chip support
|
| 93 |
+
const uint8_t* pCounterAvailabilityImage;
|
| 94 |
+
/// [out] new NVPA_RawMetricsConfig object
|
| 95 |
+
struct NVPA_RawMetricsConfig* pRawMetricsConfig;
|
| 96 |
+
} NVPW_CUDA_RawMetricsConfig_Create_V2_Params;
|
| 97 |
+
#define NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_V2_Params, pRawMetricsConfig)
|
| 98 |
+
|
| 99 |
+
/// Use either 'pChipName' or 'pCounterAvailabilityImage'.
|
| 100 |
+
NVPA_Status NVPW_CUDA_RawMetricsConfig_Create_V2(NVPW_CUDA_RawMetricsConfig_Create_V2_Params* pParams);
|
| 101 |
+
|
| 102 |
+
typedef struct NVPW_CUDA_CounterDataBuilder_Create_Params
|
| 103 |
+
{
|
| 104 |
+
/// [in]
|
| 105 |
+
size_t structSize;
|
| 106 |
+
/// [in] assign to NULL
|
| 107 |
+
void* pPriv;
|
| 108 |
+
/// [in] accepted for chips supported at the time-of-release.
|
| 109 |
+
const char* pChipName;
|
| 110 |
+
/// [in] buffer with counter availability image - required for future chip support
|
| 111 |
+
const uint8_t* pCounterAvailabilityImage;
|
| 112 |
+
/// [out] new NVPA_CounterDataBuilder object
|
| 113 |
+
struct NVPA_CounterDataBuilder* pCounterDataBuilder;
|
| 114 |
+
} NVPW_CUDA_CounterDataBuilder_Create_Params;
|
| 115 |
+
#define NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_CounterDataBuilder_Create_Params, pCounterDataBuilder)
|
| 116 |
+
|
| 117 |
+
/// Use either 'pChipName' or 'pCounterAvailabilityImage'.
|
| 118 |
+
NVPA_Status NVPW_CUDA_CounterDataBuilder_Create(NVPW_CUDA_CounterDataBuilder_Create_Params* pParams);
|
| 119 |
+
|
| 120 |
+
typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
|
| 121 |
+
|
| 122 |
+
typedef struct NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params
|
| 123 |
+
{
|
| 124 |
+
/// [in]
|
| 125 |
+
size_t structSize;
|
| 126 |
+
/// [in] assign to NULL
|
| 127 |
+
void* pPriv;
|
| 128 |
+
/// [in] accepted for chips supported at the time-of-release.
|
| 129 |
+
const char* pChipName;
|
| 130 |
+
/// [in] buffer with counter availability image - required for future chip support
|
| 131 |
+
const uint8_t* pCounterAvailabilityImage;
|
| 132 |
+
/// [out]
|
| 133 |
+
size_t scratchBufferSize;
|
| 134 |
+
} NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params;
|
| 135 |
+
#define NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params, scratchBufferSize)
|
| 136 |
+
|
| 137 |
+
/// Use either 'pChipName' or 'pCounterAvailabilityImage'.
|
| 138 |
+
NVPA_Status NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params* pParams);
|
| 139 |
+
|
| 140 |
+
typedef struct NVPW_CUDA_MetricsEvaluator_Initialize_Params
|
| 141 |
+
{
|
| 142 |
+
/// [in]
|
| 143 |
+
size_t structSize;
|
| 144 |
+
/// [in] assign to NULL
|
| 145 |
+
void* pPriv;
|
| 146 |
+
/// [in]
|
| 147 |
+
uint8_t* pScratchBuffer;
|
| 148 |
+
/// [in] the size of the 'pScratchBuffer' array, should be at least the size of the 'scratchBufferSize' returned
|
| 149 |
+
/// by 'NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize'
|
| 150 |
+
size_t scratchBufferSize;
|
| 151 |
+
/// [in] accepted for chips supported at the time-of-release.
|
| 152 |
+
const char* pChipName;
|
| 153 |
+
/// [in] buffer with counter availability image - required for future chip support
|
| 154 |
+
const uint8_t* pCounterAvailabilityImage;
|
| 155 |
+
/// [in]
|
| 156 |
+
const uint8_t* pCounterDataImage;
|
| 157 |
+
/// [in] must be provided if 'pCounterDataImage' is not NULL
|
| 158 |
+
size_t counterDataImageSize;
|
| 159 |
+
/// [out]
|
| 160 |
+
struct NVPW_MetricsEvaluator* pMetricsEvaluator;
|
| 161 |
+
} NVPW_CUDA_MetricsEvaluator_Initialize_Params;
|
| 162 |
+
#define NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_Initialize_Params, pMetricsEvaluator)
|
| 163 |
+
|
| 164 |
+
/// Use one of 'pChipName', 'pCounterAvailabilityImage', or 'pCounterDataImage'. 'pChipName' or
|
| 165 |
+
/// 'pCounterAvailabilityImage' will create a metrics evaluator based on a virtual device while 'pCounterDataImage'
|
| 166 |
+
/// will create a metrics evaluator based on the actual device.
|
| 167 |
+
NVPA_Status NVPW_CUDA_MetricsEvaluator_Initialize(NVPW_CUDA_MetricsEvaluator_Initialize_Params* pParams);
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
#ifdef __cplusplus
|
| 172 |
+
} // extern "C"
|
| 173 |
+
#endif
|
| 174 |
+
|
| 175 |
+
#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
|
| 176 |
+
#pragma GCC visibility pop
|
| 177 |
+
#endif
|
| 178 |
+
|
| 179 |
+
#endif // NVPERF_CUDA_HOST_H
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_32_intrinsics.hpp
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__SM_32_INTRINSICS_HPP__)
|
| 51 |
+
#define __SM_32_INTRINSICS_HPP__
|
| 52 |
+
|
| 53 |
+
#if defined(__CUDACC_RTC__)
|
| 54 |
+
#define __SM_32_INTRINSICS_DECL__ __device__
|
| 55 |
+
#else /* !__CUDACC_RTC__ */
|
| 56 |
+
#define __SM_32_INTRINSICS_DECL__ static __device__ __inline__
|
| 57 |
+
#endif /* __CUDACC_RTC__ */
|
| 58 |
+
|
| 59 |
+
#if defined(__cplusplus) && defined(__CUDACC__)
|
| 60 |
+
|
| 61 |
+
#if defined(_NVHPC_CUDA) || !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
|
| 62 |
+
|
| 63 |
+
/*******************************************************************************
|
| 64 |
+
* *
|
| 65 |
+
* *
|
| 66 |
+
* *
|
| 67 |
+
*******************************************************************************/
|
| 68 |
+
|
| 69 |
+
#include "cuda_runtime_api.h"
|
| 70 |
+
|
| 71 |
+
// In here are intrinsics which are built in to the compiler. These may be
|
| 72 |
+
// referenced by intrinsic implementations from this file.
|
| 73 |
+
extern "C"
|
| 74 |
+
{
|
| 75 |
+
// There are no intrinsics built in to the compiler for SM-3.5,
|
| 76 |
+
// all intrinsics are now implemented as inline PTX below.
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/*******************************************************************************
|
| 80 |
+
* *
|
| 81 |
+
* Below are implementations of SM-3.5 intrinsics which are included as *
|
| 82 |
+
* source (instead of being built in to the compiler) *
|
| 83 |
+
* *
|
| 84 |
+
*******************************************************************************/
|
| 85 |
+
|
| 86 |
+
// LDG is a "load from global via texture path" command which can exhibit higher
|
| 87 |
+
// bandwidth on GK110 than a regular LD.
|
| 88 |
+
// Define a different pointer storage size for 64 and 32 bit
|
| 89 |
+
#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
|
| 90 |
+
#define __LDG_PTR "l"
|
| 91 |
+
#else
|
| 92 |
+
#define __LDG_PTR "r"
|
| 93 |
+
#endif
|
| 94 |
+
|
| 95 |
+
/******************************************************************************
|
| 96 |
+
* __ldg *
|
| 97 |
+
******************************************************************************/
|
| 98 |
+
|
| 99 |
+
// Size of long is architecture and OS specific.
|
| 100 |
+
#if defined(__LP64__) // 64 bits
|
| 101 |
+
__SM_32_INTRINSICS_DECL__ long __ldg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 102 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 103 |
+
#else // 32 bits
|
| 104 |
+
__SM_32_INTRINSICS_DECL__ long __ldg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 105 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 106 |
+
#endif
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
__SM_32_INTRINSICS_DECL__ char __ldg(const char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
|
| 110 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldg(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
|
| 111 |
+
__SM_32_INTRINSICS_DECL__ short __ldg(const short *ptr) { unsigned short ret; asm volatile ("ld.global.nc.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
|
| 112 |
+
__SM_32_INTRINSICS_DECL__ int __ldg(const int *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
|
| 113 |
+
__SM_32_INTRINSICS_DECL__ long long __ldg(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.nc.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
|
| 114 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldg(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.nc.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 115 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldg(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.nc.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 116 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldg(const short2 *ptr) { short2 ret; asm volatile ("ld.global.nc.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 117 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldg(const short4 *ptr) { short4 ret; asm volatile ("ld.global.nc.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 118 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldg(const int2 *ptr) { int2 ret; asm volatile ("ld.global.nc.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 119 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldg(const int4 *ptr) { int4 ret; asm volatile ("ld.global.nc.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 120 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldg(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.nc.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 121 |
+
|
| 122 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldg(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (unsigned char)ret; }
|
| 123 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldg(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.nc.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 124 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldg(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.nc.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 125 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldg(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.nc.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 126 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldg(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.nc.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 127 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldg(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.nc.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 128 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldg(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.nc.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 129 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldg(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.nc.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 130 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldg(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.nc.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 131 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldg(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 132 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldg(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.nc.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 133 |
+
|
| 134 |
+
__SM_32_INTRINSICS_DECL__ float __ldg(const float *ptr) { float ret; asm volatile ("ld.global.nc.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 135 |
+
__SM_32_INTRINSICS_DECL__ double __ldg(const double *ptr) { double ret; asm volatile ("ld.global.nc.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 136 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldg(const float2 *ptr) { float2 ret; asm volatile ("ld.global.nc.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 137 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldg(const float4 *ptr) { float4 ret; asm volatile ("ld.global.nc.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 138 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldg(const double2 *ptr) { double2 ret; asm volatile ("ld.global.nc.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
/******************************************************************************
|
| 142 |
+
* __ldcg *
|
| 143 |
+
******************************************************************************/
|
| 144 |
+
|
| 145 |
+
// Size of long is architecture and OS specific.
|
| 146 |
+
#if defined(__LP64__) // 64 bits
|
| 147 |
+
__SM_32_INTRINSICS_DECL__ long __ldcg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 148 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 149 |
+
#else // 32 bits
|
| 150 |
+
__SM_32_INTRINSICS_DECL__ long __ldcg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 151 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 152 |
+
#endif
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
__SM_32_INTRINSICS_DECL__ char __ldcg(const char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
|
| 156 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldcg(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
|
| 157 |
+
__SM_32_INTRINSICS_DECL__ short __ldcg(const short *ptr) { unsigned short ret; asm volatile ("ld.global.cg.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
|
| 158 |
+
__SM_32_INTRINSICS_DECL__ int __ldcg(const int *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
|
| 159 |
+
__SM_32_INTRINSICS_DECL__ long long __ldcg(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cg.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
|
| 160 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldcg(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.cg.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 161 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldcg(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.cg.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 162 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldcg(const short2 *ptr) { short2 ret; asm volatile ("ld.global.cg.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 163 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldcg(const short4 *ptr) { short4 ret; asm volatile ("ld.global.cg.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 164 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldcg(const int2 *ptr) { int2 ret; asm volatile ("ld.global.cg.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 165 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldcg(const int4 *ptr) { int4 ret; asm volatile ("ld.global.cg.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 166 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldcg(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.cg.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 167 |
+
|
| 168 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldcg(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (unsigned char)ret; }
|
| 169 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldcg(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.cg.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 170 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldcg(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.cg.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 171 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldcg(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cg.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 172 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldcg(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.cg.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 173 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldcg(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.cg.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 174 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldcg(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.cg.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 175 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldcg(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.cg.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 176 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldcg(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.cg.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 177 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldcg(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.cg.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 178 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcg(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.cg.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 179 |
+
|
| 180 |
+
__SM_32_INTRINSICS_DECL__ float __ldcg(const float *ptr) { float ret; asm volatile ("ld.global.cg.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 181 |
+
__SM_32_INTRINSICS_DECL__ double __ldcg(const double *ptr) { double ret; asm volatile ("ld.global.cg.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 182 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldcg(const float2 *ptr) { float2 ret; asm volatile ("ld.global.cg.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 183 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldcg(const float4 *ptr) { float4 ret; asm volatile ("ld.global.cg.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 184 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldcg(const double2 *ptr) { double2 ret; asm volatile ("ld.global.cg.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 185 |
+
|
| 186 |
+
/******************************************************************************
|
| 187 |
+
* __ldca *
|
| 188 |
+
******************************************************************************/
|
| 189 |
+
|
| 190 |
+
// Size of long is architecture and OS specific.
|
| 191 |
+
#if defined(__LP64__) // 64 bits
|
| 192 |
+
__SM_32_INTRINSICS_DECL__ long __ldca(const long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 193 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldca(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 194 |
+
#else // 32 bits
|
| 195 |
+
__SM_32_INTRINSICS_DECL__ long __ldca(const long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 196 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldca(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 197 |
+
#endif
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
__SM_32_INTRINSICS_DECL__ char __ldca(const char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
|
| 201 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldca(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
|
| 202 |
+
__SM_32_INTRINSICS_DECL__ short __ldca(const short *ptr) { unsigned short ret; asm volatile ("ld.global.ca.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
|
| 203 |
+
__SM_32_INTRINSICS_DECL__ int __ldca(const int *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
|
| 204 |
+
__SM_32_INTRINSICS_DECL__ long long __ldca(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.ca.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
|
| 205 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldca(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.ca.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 206 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldca(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.ca.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 207 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldca(const short2 *ptr) { short2 ret; asm volatile ("ld.global.ca.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 208 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldca(const short4 *ptr) { short4 ret; asm volatile ("ld.global.ca.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 209 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldca(const int2 *ptr) { int2 ret; asm volatile ("ld.global.ca.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 210 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldca(const int4 *ptr) { int4 ret; asm volatile ("ld.global.ca.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 211 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldca(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.ca.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 212 |
+
|
| 213 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldca(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (unsigned char)ret; }
|
| 214 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldca(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.ca.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 215 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldca(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.ca.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 216 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldca(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.ca.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 217 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldca(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.ca.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 218 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldca(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.ca.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 219 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldca(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.ca.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 220 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldca(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.ca.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 221 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldca(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.ca.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 222 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldca(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.ca.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 223 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldca(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.ca.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 224 |
+
|
| 225 |
+
__SM_32_INTRINSICS_DECL__ float __ldca(const float *ptr) { float ret; asm volatile ("ld.global.ca.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 226 |
+
__SM_32_INTRINSICS_DECL__ double __ldca(const double *ptr) { double ret; asm volatile ("ld.global.ca.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 227 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldca(const float2 *ptr) { float2 ret; asm volatile ("ld.global.ca.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 228 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldca(const float4 *ptr) { float4 ret; asm volatile ("ld.global.ca.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 229 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldca(const double2 *ptr) { double2 ret; asm volatile ("ld.global.ca.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 230 |
+
|
| 231 |
+
/******************************************************************************
|
| 232 |
+
* __ldcs *
|
| 233 |
+
******************************************************************************/
|
| 234 |
+
|
| 235 |
+
// Size of long is architecture and OS specific.
|
| 236 |
+
#if defined(__LP64__) // 64 bits
|
| 237 |
+
__SM_32_INTRINSICS_DECL__ long __ldcs(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 238 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcs(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 239 |
+
#else // 32 bits
|
| 240 |
+
__SM_32_INTRINSICS_DECL__ long __ldcs(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
|
| 241 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcs(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 242 |
+
#endif
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
__SM_32_INTRINSICS_DECL__ char __ldcs(const char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
|
| 246 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldcs(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
|
| 247 |
+
__SM_32_INTRINSICS_DECL__ short __ldcs(const short *ptr) { unsigned short ret; asm volatile ("ld.global.cs.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
|
| 248 |
+
__SM_32_INTRINSICS_DECL__ int __ldcs(const int *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
|
| 249 |
+
__SM_32_INTRINSICS_DECL__ long long __ldcs(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cs.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
|
| 250 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldcs(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.cs.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 251 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldcs(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.cs.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 252 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldcs(const short2 *ptr) { short2 ret; asm volatile ("ld.global.cs.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 253 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldcs(const short4 *ptr) { short4 ret; asm volatile ("ld.global.cs.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 254 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldcs(const int2 *ptr) { int2 ret; asm volatile ("ld.global.cs.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 255 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldcs(const int4 *ptr) { int4 ret; asm volatile ("ld.global.cs.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 256 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldcs(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.cs.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 257 |
+
|
| 258 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldcs(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return (unsigned char)ret; }
|
| 259 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldcs(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.cs.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 260 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldcs(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.cs.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 261 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldcs(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cs.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 262 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldcs(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.cs.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 263 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldcs(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.cs.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 264 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldcs(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.cs.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 265 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldcs(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.cs.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 266 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldcs(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.cs.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 267 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldcs(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.cs.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 268 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcs(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.cs.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 269 |
+
|
| 270 |
+
__SM_32_INTRINSICS_DECL__ float __ldcs(const float *ptr) { float ret; asm volatile ("ld.global.cs.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 271 |
+
__SM_32_INTRINSICS_DECL__ double __ldcs(const double *ptr) { double ret; asm volatile ("ld.global.cs.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
|
| 272 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldcs(const float2 *ptr) { float2 ret; asm volatile ("ld.global.cs.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 273 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldcs(const float4 *ptr) { float4 ret; asm volatile ("ld.global.cs.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
|
| 274 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldcs(const double2 *ptr) { double2 ret; asm volatile ("ld.global.cs.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
|
| 275 |
+
|
| 276 |
+
/******************************************************************************
|
| 277 |
+
* __ldlu *
|
| 278 |
+
******************************************************************************/
|
| 279 |
+
|
| 280 |
+
// Size of long is architecture and OS specific.
|
| 281 |
+
#if defined(__LP64__) // 64 bits
|
| 282 |
+
__SM_32_INTRINSICS_DECL__ long __ldlu(const long *ptr) { unsigned long ret; asm ("ld.global.lu.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
|
| 283 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldlu(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.lu.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 284 |
+
#else // 32 bits
|
| 285 |
+
__SM_32_INTRINSICS_DECL__ long __ldlu(const long *ptr) { unsigned long ret; asm ("ld.global.lu.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
|
| 286 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldlu(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.lu.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 287 |
+
#endif
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
__SM_32_INTRINSICS_DECL__ char __ldlu(const char *ptr) { unsigned int ret; asm ("ld.global.lu.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (char)ret; }
|
| 291 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldlu(const signed char *ptr) { unsigned int ret; asm ("ld.global.lu.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (signed char)ret; }
|
| 292 |
+
__SM_32_INTRINSICS_DECL__ short __ldlu(const short *ptr) { unsigned short ret; asm ("ld.global.lu.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return (short)ret; }
|
| 293 |
+
__SM_32_INTRINSICS_DECL__ int __ldlu(const int *ptr) { unsigned int ret; asm ("ld.global.lu.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (int)ret; }
|
| 294 |
+
__SM_32_INTRINSICS_DECL__ long long __ldlu(const long long *ptr) { unsigned long long ret; asm ("ld.global.lu.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long long)ret; }
|
| 295 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldlu(const char2 *ptr) { char2 ret; int2 tmp; asm ("ld.global.lu.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 296 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldlu(const char4 *ptr) { char4 ret; int4 tmp; asm ("ld.global.lu.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 297 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldlu(const short2 *ptr) { short2 ret; asm ("ld.global.lu.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 298 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldlu(const short4 *ptr) { short4 ret; asm ("ld.global.lu.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 299 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldlu(const int2 *ptr) { int2 ret; asm ("ld.global.lu.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 300 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldlu(const int4 *ptr) { int4 ret; asm ("ld.global.lu.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 301 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldlu(const longlong2 *ptr) { longlong2 ret; asm ("ld.global.lu.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 302 |
+
|
| 303 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldlu(const unsigned char *ptr) { unsigned int ret; asm ("ld.global.lu.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (unsigned char)ret; }
|
| 304 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldlu(const unsigned short *ptr) { unsigned short ret; asm ("ld.global.lu.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 305 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldlu(const unsigned int *ptr) { unsigned int ret; asm ("ld.global.lu.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 306 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldlu(const unsigned long long *ptr) { unsigned long long ret; asm ("ld.global.lu.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 307 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldlu(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm ("ld.global.lu.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 308 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldlu(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm ("ld.global.lu.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 309 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldlu(const ushort2 *ptr) { ushort2 ret; asm ("ld.global.lu.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 310 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldlu(const ushort4 *ptr) { ushort4 ret; asm ("ld.global.lu.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 311 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldlu(const uint2 *ptr) { uint2 ret; asm ("ld.global.lu.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 312 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldlu(const uint4 *ptr) { uint4 ret; asm ("ld.global.lu.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 313 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldlu(const ulonglong2 *ptr) { ulonglong2 ret; asm ("ld.global.lu.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 314 |
+
|
| 315 |
+
__SM_32_INTRINSICS_DECL__ float __ldlu(const float *ptr) { float ret; asm ("ld.global.lu.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 316 |
+
__SM_32_INTRINSICS_DECL__ double __ldlu(const double *ptr) { double ret; asm ("ld.global.lu.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 317 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldlu(const float2 *ptr) { float2 ret; asm ("ld.global.lu.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 318 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldlu(const float4 *ptr) { float4 ret; asm ("ld.global.lu.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 319 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldlu(const double2 *ptr) { double2 ret; asm ("ld.global.lu.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 320 |
+
|
| 321 |
+
/******************************************************************************
|
| 322 |
+
* __ldcv *
|
| 323 |
+
******************************************************************************/
|
| 324 |
+
|
| 325 |
+
// Size of long is architecture and OS specific.
|
| 326 |
+
#if defined(__LP64__) // 64 bits
|
| 327 |
+
__SM_32_INTRINSICS_DECL__ long __ldcv(const long *ptr) { unsigned long ret; asm ("ld.global.cv.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
|
| 328 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcv(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.cv.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 329 |
+
#else // 32 bits
|
| 330 |
+
__SM_32_INTRINSICS_DECL__ long __ldcv(const long *ptr) { unsigned long ret; asm ("ld.global.cv.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
|
| 331 |
+
__SM_32_INTRINSICS_DECL__ unsigned long __ldcv(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.cv.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 332 |
+
#endif
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
__SM_32_INTRINSICS_DECL__ char __ldcv(const char *ptr) { unsigned int ret; asm ("ld.global.cv.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (char)ret; }
|
| 336 |
+
__SM_32_INTRINSICS_DECL__ signed char __ldcv(const signed char *ptr) { unsigned int ret; asm ("ld.global.cv.s8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (signed char)ret; }
|
| 337 |
+
__SM_32_INTRINSICS_DECL__ short __ldcv(const short *ptr) { unsigned short ret; asm ("ld.global.cv.s16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return (short)ret; }
|
| 338 |
+
__SM_32_INTRINSICS_DECL__ int __ldcv(const int *ptr) { unsigned int ret; asm ("ld.global.cv.s32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (int)ret; }
|
| 339 |
+
__SM_32_INTRINSICS_DECL__ long long __ldcv(const long long *ptr) { unsigned long long ret; asm ("ld.global.cv.s64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long long)ret; }
|
| 340 |
+
__SM_32_INTRINSICS_DECL__ char2 __ldcv(const char2 *ptr) { char2 ret; int2 tmp; asm ("ld.global.cv.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
|
| 341 |
+
__SM_32_INTRINSICS_DECL__ char4 __ldcv(const char4 *ptr) { char4 ret; int4 tmp; asm ("ld.global.cv.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
|
| 342 |
+
__SM_32_INTRINSICS_DECL__ short2 __ldcv(const short2 *ptr) { short2 ret; asm ("ld.global.cv.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 343 |
+
__SM_32_INTRINSICS_DECL__ short4 __ldcv(const short4 *ptr) { short4 ret; asm ("ld.global.cv.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 344 |
+
__SM_32_INTRINSICS_DECL__ int2 __ldcv(const int2 *ptr) { int2 ret; asm ("ld.global.cv.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 345 |
+
__SM_32_INTRINSICS_DECL__ int4 __ldcv(const int4 *ptr) { int4 ret; asm ("ld.global.cv.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 346 |
+
__SM_32_INTRINSICS_DECL__ longlong2 __ldcv(const longlong2 *ptr) { longlong2 ret; asm ("ld.global.cv.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 347 |
+
|
| 348 |
+
__SM_32_INTRINSICS_DECL__ unsigned char __ldcv(const unsigned char *ptr) { unsigned int ret; asm ("ld.global.cv.u8 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (unsigned char)ret; }
|
| 349 |
+
__SM_32_INTRINSICS_DECL__ unsigned short __ldcv(const unsigned short *ptr) { unsigned short ret; asm ("ld.global.cv.u16 %0, [%1];" : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 350 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __ldcv(const unsigned int *ptr) { unsigned int ret; asm ("ld.global.cv.u32 %0, [%1];" : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 351 |
+
__SM_32_INTRINSICS_DECL__ unsigned long long __ldcv(const unsigned long long *ptr) { unsigned long long ret; asm ("ld.global.cv.u64 %0, [%1];" : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 352 |
+
__SM_32_INTRINSICS_DECL__ uchar2 __ldcv(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm ("ld.global.cv.v2.u8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
|
| 353 |
+
__SM_32_INTRINSICS_DECL__ uchar4 __ldcv(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm ("ld.global.cv.v4.u8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
|
| 354 |
+
__SM_32_INTRINSICS_DECL__ ushort2 __ldcv(const ushort2 *ptr) { ushort2 ret; asm ("ld.global.cv.v2.u16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 355 |
+
__SM_32_INTRINSICS_DECL__ ushort4 __ldcv(const ushort4 *ptr) { ushort4 ret; asm ("ld.global.cv.v4.u16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 356 |
+
__SM_32_INTRINSICS_DECL__ uint2 __ldcv(const uint2 *ptr) { uint2 ret; asm ("ld.global.cv.v2.u32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 357 |
+
__SM_32_INTRINSICS_DECL__ uint4 __ldcv(const uint4 *ptr) { uint4 ret; asm ("ld.global.cv.v4.u32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 358 |
+
__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcv(const ulonglong2 *ptr) { ulonglong2 ret; asm ("ld.global.cv.v2.u64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 359 |
+
|
| 360 |
+
__SM_32_INTRINSICS_DECL__ float __ldcv(const float *ptr) { float ret; asm ("ld.global.cv.f32 %0, [%1];" : "=f"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 361 |
+
__SM_32_INTRINSICS_DECL__ double __ldcv(const double *ptr) { double ret; asm ("ld.global.cv.f64 %0, [%1];" : "=d"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 362 |
+
__SM_32_INTRINSICS_DECL__ float2 __ldcv(const float2 *ptr) { float2 ret; asm ("ld.global.cv.v2.f32 {%0,%1}, [%2];" : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 363 |
+
__SM_32_INTRINSICS_DECL__ float4 __ldcv(const float4 *ptr) { float4 ret; asm ("ld.global.cv.v4.f32 {%0,%1,%2,%3}, [%4];" : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 364 |
+
__SM_32_INTRINSICS_DECL__ double2 __ldcv(const double2 *ptr) { double2 ret; asm ("ld.global.cv.v2.f64 {%0,%1}, [%2];" : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
|
| 365 |
+
|
| 366 |
+
/******************************************************************************
|
| 367 |
+
* __stwb *
|
| 368 |
+
******************************************************************************/
|
| 369 |
+
|
| 370 |
+
// Size of long is architecture and OS specific.
|
| 371 |
+
#if defined(__LP64__) // 64 bits
|
| 372 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(long *ptr, long value) { asm ("st.global.wb.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 373 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long *ptr, unsigned long value) { asm ("st.global.wb.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 374 |
+
#else // 32 bits
|
| 375 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(long *ptr, long value) { asm ("st.global.wb.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 376 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long *ptr, unsigned long value) { asm ("st.global.wb.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 377 |
+
#endif
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(char *ptr, char value) { asm ("st.global.wb.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 381 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(signed char *ptr, signed char value) { asm ("st.global.wb.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 382 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(short *ptr, short value) { asm ("st.global.wb.s16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 383 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(int *ptr, int value) { asm ("st.global.wb.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 384 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(long long *ptr, long long value) { asm ("st.global.wb.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 385 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.wb.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 386 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wb.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 387 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(short2 *ptr, short2 value) { asm ("st.global.wb.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 388 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(short4 *ptr, short4 value) { asm ("st.global.wb.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 389 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(int2 *ptr, int2 value) { asm ("st.global.wb.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 390 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(int4 *ptr, int4 value) { asm ("st.global.wb.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 391 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(longlong2 *ptr, longlong2 value) { asm ("st.global.wb.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 392 |
+
|
| 393 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned char *ptr, unsigned char value) { asm ("st.global.wb.u8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 394 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned short *ptr, unsigned short value) { asm ("st.global.wb.u16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 395 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned int *ptr, unsigned int value) { asm ("st.global.wb.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 396 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long long *ptr, unsigned long long value) { asm ("st.global.wb.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 397 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.wb.v2.u8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 398 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wb.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 399 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(ushort2 *ptr, ushort2 value) { asm ("st.global.wb.v2.u16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 400 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(ushort4 *ptr, ushort4 value) { asm ("st.global.wb.v4.u16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 401 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(uint2 *ptr, uint2 value) { asm ("st.global.wb.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 402 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(uint4 *ptr, uint4 value) { asm ("st.global.wb.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 403 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.wb.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 404 |
+
|
| 405 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(float *ptr, float value) { asm ("st.global.wb.f32 [%0], %1;" :: __LDG_PTR (ptr), "f"(value) : "memory"); }
|
| 406 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(double *ptr, double value) { asm ("st.global.wb.f64 [%0], %1;" :: __LDG_PTR (ptr), "d"(value) : "memory"); }
|
| 407 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(float2 *ptr, float2 value) { asm ("st.global.wb.v2.f32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
|
| 408 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(float4 *ptr, float4 value) { asm ("st.global.wb.v4.f32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
|
| 409 |
+
__SM_32_INTRINSICS_DECL__ void __stwb(double2 *ptr, double2 value) { asm ("st.global.wb.v2.f64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
|
| 410 |
+
|
| 411 |
+
/******************************************************************************
|
| 412 |
+
* __stcg *
|
| 413 |
+
******************************************************************************/
|
| 414 |
+
|
| 415 |
+
// Size of long is architecture and OS specific.
|
| 416 |
+
#if defined(__LP64__) // 64 bits
|
| 417 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(long *ptr, long value) { asm ("st.global.cg.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 418 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long *ptr, unsigned long value) { asm ("st.global.cg.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 419 |
+
#else // 32 bits
|
| 420 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(long *ptr, long value) { asm ("st.global.cg.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 421 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long *ptr, unsigned long value) { asm ("st.global.cg.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 422 |
+
#endif
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(char *ptr, char value) { asm ("st.global.cg.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 426 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(signed char *ptr, signed char value) { asm ("st.global.cg.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 427 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(short *ptr, short value) { asm ("st.global.cg.s16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 428 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(int *ptr, int value) { asm ("st.global.cg.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 429 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(long long *ptr, long long value) { asm ("st.global.cg.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 430 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.cg.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 431 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cg.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 432 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(short2 *ptr, short2 value) { asm ("st.global.cg.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 433 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(short4 *ptr, short4 value) { asm ("st.global.cg.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 434 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(int2 *ptr, int2 value) { asm ("st.global.cg.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 435 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(int4 *ptr, int4 value) { asm ("st.global.cg.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 436 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(longlong2 *ptr, longlong2 value) { asm ("st.global.cg.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 437 |
+
|
| 438 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned char *ptr, unsigned char value) { asm ("st.global.cg.u8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 439 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned short *ptr, unsigned short value) { asm ("st.global.cg.u16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 440 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned int *ptr, unsigned int value) { asm ("st.global.cg.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 441 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long long *ptr, unsigned long long value) { asm ("st.global.cg.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 442 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.cg.v2.u8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 443 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cg.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 444 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(ushort2 *ptr, ushort2 value) { asm ("st.global.cg.v2.u16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 445 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(ushort4 *ptr, ushort4 value) { asm ("st.global.cg.v4.u16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 446 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(uint2 *ptr, uint2 value) { asm ("st.global.cg.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 447 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(uint4 *ptr, uint4 value) { asm ("st.global.cg.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 448 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.cg.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 449 |
+
|
| 450 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(float *ptr, float value) { asm ("st.global.cg.f32 [%0], %1;" :: __LDG_PTR (ptr), "f"(value) : "memory"); }
|
| 451 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(double *ptr, double value) { asm ("st.global.cg.f64 [%0], %1;" :: __LDG_PTR (ptr), "d"(value) : "memory"); }
|
| 452 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(float2 *ptr, float2 value) { asm ("st.global.cg.v2.f32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
|
| 453 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(float4 *ptr, float4 value) { asm ("st.global.cg.v4.f32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
|
| 454 |
+
__SM_32_INTRINSICS_DECL__ void __stcg(double2 *ptr, double2 value) { asm ("st.global.cg.v2.f64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
|
| 455 |
+
|
| 456 |
+
/******************************************************************************
|
| 457 |
+
* __stcs *
|
| 458 |
+
******************************************************************************/
|
| 459 |
+
|
| 460 |
+
// Size of long is architecture and OS specific.
|
| 461 |
+
#if defined(__LP64__) // 64 bits
|
| 462 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(long *ptr, long value) { asm ("st.global.cs.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 463 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long *ptr, unsigned long value) { asm ("st.global.cs.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 464 |
+
#else // 32 bits
|
| 465 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(long *ptr, long value) { asm ("st.global.cs.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 466 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long *ptr, unsigned long value) { asm ("st.global.cs.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 467 |
+
#endif
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(char *ptr, char value) { asm ("st.global.cs.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 471 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(signed char *ptr, signed char value) { asm ("st.global.cs.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 472 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(short *ptr, short value) { asm ("st.global.cs.s16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 473 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(int *ptr, int value) { asm ("st.global.cs.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 474 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(long long *ptr, long long value) { asm ("st.global.cs.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 475 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.cs.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 476 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cs.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 477 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(short2 *ptr, short2 value) { asm ("st.global.cs.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 478 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(short4 *ptr, short4 value) { asm ("st.global.cs.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 479 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(int2 *ptr, int2 value) { asm ("st.global.cs.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 480 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(int4 *ptr, int4 value) { asm ("st.global.cs.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 481 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(longlong2 *ptr, longlong2 value) { asm ("st.global.cs.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 482 |
+
|
| 483 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned char *ptr, unsigned char value) { asm ("st.global.cs.u8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 484 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned short *ptr, unsigned short value) { asm ("st.global.cs.u16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 485 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned int *ptr, unsigned int value) { asm ("st.global.cs.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 486 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long long *ptr, unsigned long long value) { asm ("st.global.cs.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 487 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.cs.v2.u8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 488 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cs.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 489 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(ushort2 *ptr, ushort2 value) { asm ("st.global.cs.v2.u16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 490 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(ushort4 *ptr, ushort4 value) { asm ("st.global.cs.v4.u16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 491 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(uint2 *ptr, uint2 value) { asm ("st.global.cs.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 492 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(uint4 *ptr, uint4 value) { asm ("st.global.cs.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 493 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.cs.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 494 |
+
|
| 495 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(float *ptr, float value) { asm ("st.global.cs.f32 [%0], %1;" :: __LDG_PTR (ptr), "f"(value) : "memory"); }
|
| 496 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(double *ptr, double value) { asm ("st.global.cs.f64 [%0], %1;" :: __LDG_PTR (ptr), "d"(value) : "memory"); }
|
| 497 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(float2 *ptr, float2 value) { asm ("st.global.cs.v2.f32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
|
| 498 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(float4 *ptr, float4 value) { asm ("st.global.cs.v4.f32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
|
| 499 |
+
__SM_32_INTRINSICS_DECL__ void __stcs(double2 *ptr, double2 value) { asm ("st.global.cs.v2.f64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
|
| 500 |
+
|
| 501 |
+
/******************************************************************************
|
| 502 |
+
* __stwt *
|
| 503 |
+
******************************************************************************/
|
| 504 |
+
|
| 505 |
+
// Size of long is architecture and OS specific.
|
| 506 |
+
#if defined(__LP64__) // 64 bits
|
| 507 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(long *ptr, long value) { asm ("st.global.wt.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 508 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long *ptr, unsigned long value) { asm ("st.global.wt.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 509 |
+
#else // 32 bits
|
| 510 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(long *ptr, long value) { asm ("st.global.wt.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 511 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long *ptr, unsigned long value) { asm ("st.global.wt.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 512 |
+
#endif
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(char *ptr, char value) { asm ("st.global.wt.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 516 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(signed char *ptr, signed char value) { asm ("st.global.wt.s8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 517 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(short *ptr, short value) { asm ("st.global.wt.s16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 518 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(int *ptr, int value) { asm ("st.global.wt.s32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 519 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(long long *ptr, long long value) { asm ("st.global.wt.s64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 520 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.wt.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 521 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wt.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 522 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(short2 *ptr, short2 value) { asm ("st.global.wt.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 523 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(short4 *ptr, short4 value) { asm ("st.global.wt.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 524 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(int2 *ptr, int2 value) { asm ("st.global.wt.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 525 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(int4 *ptr, int4 value) { asm ("st.global.wt.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 526 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(longlong2 *ptr, longlong2 value) { asm ("st.global.wt.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 527 |
+
|
| 528 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned char *ptr, unsigned char value) { asm ("st.global.wt.u8 [%0], %1;" :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
|
| 529 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned short *ptr, unsigned short value) { asm ("st.global.wt.u16 [%0], %1;" :: __LDG_PTR (ptr), "h"(value) : "memory"); }
|
| 530 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned int *ptr, unsigned int value) { asm ("st.global.wt.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
|
| 531 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long long *ptr, unsigned long long value) { asm ("st.global.wt.u64 [%0], %1;" :: __LDG_PTR (ptr), "l"(value) : "memory"); }
|
| 532 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.wt.v2.u8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
|
| 533 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wt.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
|
| 534 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(ushort2 *ptr, ushort2 value) { asm ("st.global.wt.v2.u16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
|
| 535 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(ushort4 *ptr, ushort4 value) { asm ("st.global.wt.v4.u16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
|
| 536 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(uint2 *ptr, uint2 value) { asm ("st.global.wt.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
|
| 537 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(uint4 *ptr, uint4 value) { asm ("st.global.wt.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
|
| 538 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.wt.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
|
| 539 |
+
|
| 540 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(float *ptr, float value) { asm ("st.global.wt.f32 [%0], %1;" :: __LDG_PTR (ptr), "f"(value) : "memory"); }
|
| 541 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(double *ptr, double value) { asm ("st.global.wt.f64 [%0], %1;" :: __LDG_PTR (ptr), "d"(value) : "memory"); }
|
| 542 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(float2 *ptr, float2 value) { asm ("st.global.wt.v2.f32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
|
| 543 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(float4 *ptr, float4 value) { asm ("st.global.wt.v4.f32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
|
| 544 |
+
__SM_32_INTRINSICS_DECL__ void __stwt(double2 *ptr, double2 value) { asm ("st.global.wt.v2.f64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
|
| 545 |
+
|
| 546 |
+
#undef __LDG_PTR
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
// SHF is the "funnel shift" operation - an accelerated left/right shift with carry
|
| 550 |
+
// operating on 64-bit quantities, which are concatenations of two 32-bit registers.
|
| 551 |
+
|
| 552 |
+
// This shifts [b:a] left by "shift" bits, returning the most significant bits of the result.
|
| 553 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift)
|
| 554 |
+
{
|
| 555 |
+
unsigned int ret;
|
| 556 |
+
asm volatile ("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
|
| 557 |
+
return ret;
|
| 558 |
+
}
|
| 559 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift)
|
| 560 |
+
{
|
| 561 |
+
unsigned int ret;
|
| 562 |
+
asm volatile ("shf.l.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
|
| 563 |
+
return ret;
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
// This shifts [b:a] right by "shift" bits, returning the least significant bits of the result.
|
| 567 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift)
|
| 568 |
+
{
|
| 569 |
+
unsigned int ret;
|
| 570 |
+
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
|
| 571 |
+
return ret;
|
| 572 |
+
}
|
| 573 |
+
__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift)
|
| 574 |
+
{
|
| 575 |
+
unsigned int ret;
|
| 576 |
+
asm volatile ("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
|
| 577 |
+
return ret;
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
#endif /* _NVHPC_CUDA || !__CUDA_ARCH__ || __CUDA_ARCH__ >= 320 */
|
| 582 |
+
|
| 583 |
+
#endif /* __cplusplus && __CUDACC__ */
|
| 584 |
+
|
| 585 |
+
#undef __SM_32_INTRINSICS_DECL__
|
| 586 |
+
|
| 587 |
+
#endif /* !__SM_32_INTRINSICS_HPP__ */
|
| 588 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_60_atomic_functions.h
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
//NOTE: For NVRTC, these declarations have been moved into the compiler (to reduce compile time)
|
| 51 |
+
#define EXCLUDE_FROM_RTC
|
| 52 |
+
|
| 53 |
+
#if !defined(__SM_60_ATOMIC_FUNCTIONS_H__)
|
| 54 |
+
#define __SM_60_ATOMIC_FUNCTIONS_H__
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
#if defined(__CUDACC_RTC__)
|
| 58 |
+
#define __SM_60_ATOMIC_FUNCTIONS_DECL__ __device__
|
| 59 |
+
#elif defined(_NVHPC_CUDA)
|
| 60 |
+
#define __SM_60_ATOMIC_FUNCTIONS_DECL__ extern __device__ __cudart_builtin__
|
| 61 |
+
#else /* __CUDACC_RTC__ */
|
| 62 |
+
#define __SM_60_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
|
| 63 |
+
#endif /* __CUDACC_RTC__ */
|
| 64 |
+
|
| 65 |
+
#if defined(__cplusplus) && defined(__CUDACC__)
|
| 66 |
+
|
| 67 |
+
#if defined(_NVHPC_CUDA) || !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
|
| 68 |
+
|
| 69 |
+
/*******************************************************************************
|
| 70 |
+
* *
|
| 71 |
+
* *
|
| 72 |
+
* *
|
| 73 |
+
*******************************************************************************/
|
| 74 |
+
|
| 75 |
+
#include "cuda_runtime_api.h"
|
| 76 |
+
|
| 77 |
+
/* Add !defined(_NVHPC_CUDA) to avoid empty function definition in CUDA
|
| 78 |
+
* C++ compiler where the macro __CUDA_ARCH__ is not defined. */
|
| 79 |
+
#if !defined(__CUDA_ARCH__) && !defined(_NVHPC_CUDA)
|
| 80 |
+
#define __DEF_IF_HOST { }
|
| 81 |
+
#else /* !__CUDA_ARCH__ */
|
| 82 |
+
#define __DEF_IF_HOST ;
|
| 83 |
+
#endif /* __CUDA_ARCH__ */
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
/*******************************************************************************
|
| 88 |
+
* *
|
| 89 |
+
* *
|
| 90 |
+
* *
|
| 91 |
+
*******************************************************************************/
|
| 92 |
+
|
| 93 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__ double atomicAdd(double *address, double val) __DEF_IF_HOST
|
| 94 |
+
|
| 95 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 96 |
+
int atomicAdd_block(int *address, int val) __DEF_IF_HOST
|
| 97 |
+
|
| 98 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 99 |
+
int atomicAdd_system(int *address, int val) __DEF_IF_HOST
|
| 100 |
+
|
| 101 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 102 |
+
unsigned int atomicAdd_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 103 |
+
|
| 104 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 105 |
+
unsigned int atomicAdd_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 106 |
+
|
| 107 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 108 |
+
unsigned long long atomicAdd_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 109 |
+
|
| 110 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 111 |
+
unsigned long long atomicAdd_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 112 |
+
|
| 113 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 114 |
+
float atomicAdd_block(float *address, float val) __DEF_IF_HOST
|
| 115 |
+
|
| 116 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 117 |
+
float atomicAdd_system(float *address, float val) __DEF_IF_HOST
|
| 118 |
+
|
| 119 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 120 |
+
double atomicAdd_block(double *address, double val) __DEF_IF_HOST
|
| 121 |
+
|
| 122 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 123 |
+
double atomicAdd_system(double *address, double val) __DEF_IF_HOST
|
| 124 |
+
|
| 125 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 126 |
+
int atomicSub_block(int *address, int val) __DEF_IF_HOST
|
| 127 |
+
|
| 128 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 129 |
+
int atomicSub_system(int *address, int val) __DEF_IF_HOST
|
| 130 |
+
|
| 131 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 132 |
+
unsigned int atomicSub_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 133 |
+
|
| 134 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 135 |
+
unsigned int atomicSub_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 136 |
+
|
| 137 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 138 |
+
int atomicExch_block(int *address, int val) __DEF_IF_HOST
|
| 139 |
+
|
| 140 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 141 |
+
int atomicExch_system(int *address, int val) __DEF_IF_HOST
|
| 142 |
+
|
| 143 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 144 |
+
unsigned int atomicExch_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 145 |
+
|
| 146 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 147 |
+
unsigned int atomicExch_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 148 |
+
|
| 149 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 150 |
+
unsigned long long atomicExch_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 151 |
+
|
| 152 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 153 |
+
unsigned long long atomicExch_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 154 |
+
|
| 155 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 156 |
+
float atomicExch_block(float *address, float val) __DEF_IF_HOST
|
| 157 |
+
|
| 158 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 159 |
+
float atomicExch_system(float *address, float val) __DEF_IF_HOST
|
| 160 |
+
|
| 161 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 162 |
+
int atomicMin_block(int *address, int val) __DEF_IF_HOST
|
| 163 |
+
|
| 164 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 165 |
+
int atomicMin_system(int *address, int val) __DEF_IF_HOST
|
| 166 |
+
|
| 167 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 168 |
+
long long atomicMin_block(long long *address, long long val) __DEF_IF_HOST
|
| 169 |
+
|
| 170 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 171 |
+
long long atomicMin_system(long long *address, long long val) __DEF_IF_HOST
|
| 172 |
+
|
| 173 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 174 |
+
unsigned int atomicMin_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 175 |
+
|
| 176 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 177 |
+
unsigned int atomicMin_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 178 |
+
|
| 179 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 180 |
+
unsigned long long atomicMin_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 181 |
+
|
| 182 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 183 |
+
unsigned long long atomicMin_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 184 |
+
|
| 185 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 186 |
+
int atomicMax_block(int *address, int val) __DEF_IF_HOST
|
| 187 |
+
|
| 188 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 189 |
+
int atomicMax_system(int *address, int val) __DEF_IF_HOST
|
| 190 |
+
|
| 191 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 192 |
+
long long atomicMax_block(long long *address, long long val) __DEF_IF_HOST
|
| 193 |
+
|
| 194 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 195 |
+
long long atomicMax_system(long long *address, long long val) __DEF_IF_HOST
|
| 196 |
+
|
| 197 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 198 |
+
unsigned int atomicMax_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 199 |
+
|
| 200 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 201 |
+
unsigned int atomicMax_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 202 |
+
|
| 203 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 204 |
+
unsigned long long atomicMax_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 205 |
+
|
| 206 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 207 |
+
unsigned long long atomicMax_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 208 |
+
|
| 209 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 210 |
+
unsigned int atomicInc_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 211 |
+
|
| 212 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 213 |
+
unsigned int atomicInc_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 214 |
+
|
| 215 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 216 |
+
unsigned int atomicDec_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 217 |
+
|
| 218 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 219 |
+
unsigned int atomicDec_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 220 |
+
|
| 221 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 222 |
+
int atomicCAS_block(int *address, int compare, int val) __DEF_IF_HOST
|
| 223 |
+
|
| 224 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 225 |
+
int atomicCAS_system(int *address, int compare, int val) __DEF_IF_HOST
|
| 226 |
+
|
| 227 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 228 |
+
unsigned int atomicCAS_block(unsigned int *address, unsigned int compare,
|
| 229 |
+
unsigned int val) __DEF_IF_HOST
|
| 230 |
+
|
| 231 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 232 |
+
unsigned int atomicCAS_system(unsigned int *address, unsigned int compare,
|
| 233 |
+
unsigned int val) __DEF_IF_HOST
|
| 234 |
+
|
| 235 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 236 |
+
unsigned long long int atomicCAS_block(unsigned long long int *address,
|
| 237 |
+
unsigned long long int compare,
|
| 238 |
+
unsigned long long int val) __DEF_IF_HOST
|
| 239 |
+
|
| 240 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 241 |
+
unsigned long long int atomicCAS_system(unsigned long long int *address,
|
| 242 |
+
unsigned long long int compare,
|
| 243 |
+
unsigned long long int val) __DEF_IF_HOST
|
| 244 |
+
|
| 245 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 246 |
+
int atomicAnd_block(int *address, int val) __DEF_IF_HOST
|
| 247 |
+
|
| 248 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 249 |
+
int atomicAnd_system(int *address, int val) __DEF_IF_HOST
|
| 250 |
+
|
| 251 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 252 |
+
long long atomicAnd_block(long long *address, long long val) __DEF_IF_HOST
|
| 253 |
+
|
| 254 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 255 |
+
long long atomicAnd_system(long long *address, long long val) __DEF_IF_HOST
|
| 256 |
+
|
| 257 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 258 |
+
unsigned int atomicAnd_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 259 |
+
|
| 260 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 261 |
+
unsigned int atomicAnd_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 262 |
+
|
| 263 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 264 |
+
unsigned long long atomicAnd_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 265 |
+
|
| 266 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 267 |
+
unsigned long long atomicAnd_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 268 |
+
|
| 269 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 270 |
+
int atomicOr_block(int *address, int val) __DEF_IF_HOST
|
| 271 |
+
|
| 272 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 273 |
+
int atomicOr_system(int *address, int val) __DEF_IF_HOST
|
| 274 |
+
|
| 275 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 276 |
+
long long atomicOr_block(long long *address, long long val) __DEF_IF_HOST
|
| 277 |
+
|
| 278 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 279 |
+
long long atomicOr_system(long long *address, long long val) __DEF_IF_HOST
|
| 280 |
+
|
| 281 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 282 |
+
unsigned int atomicOr_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 283 |
+
|
| 284 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 285 |
+
unsigned int atomicOr_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 286 |
+
|
| 287 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 288 |
+
unsigned long long atomicOr_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 289 |
+
|
| 290 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 291 |
+
unsigned long long atomicOr_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 292 |
+
|
| 293 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 294 |
+
int atomicXor_block(int *address, int val) __DEF_IF_HOST
|
| 295 |
+
|
| 296 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 297 |
+
int atomicXor_system(int *address, int val) __DEF_IF_HOST
|
| 298 |
+
|
| 299 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 300 |
+
long long atomicXor_block(long long *address, long long val) __DEF_IF_HOST
|
| 301 |
+
|
| 302 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 303 |
+
long long atomicXor_system(long long *address, long long val) __DEF_IF_HOST
|
| 304 |
+
|
| 305 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 306 |
+
unsigned int atomicXor_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 307 |
+
|
| 308 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 309 |
+
unsigned int atomicXor_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
|
| 310 |
+
|
| 311 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 312 |
+
unsigned long long atomicXor_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 313 |
+
|
| 314 |
+
__SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 315 |
+
unsigned long long atomicXor_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
|
| 316 |
+
|
| 317 |
+
#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 600 */
|
| 318 |
+
|
| 319 |
+
#endif /* __cplusplus && __CUDACC__ */
|
| 320 |
+
|
| 321 |
+
#undef __SM_60_ATOMIC_FUNCTIONS_DECL__
|
| 322 |
+
#undef __DEF_IF_HOST
|
| 323 |
+
|
| 324 |
+
#if !defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)
|
| 325 |
+
#include "sm_60_atomic_functions.hpp"
|
| 326 |
+
#endif /* !__CUDACC_RTC__ && defined(__CUDA_ARCH__) */
|
| 327 |
+
|
| 328 |
+
#endif /* !__SM_60_ATOMIC_FUNCTIONS_H__ */
|
| 329 |
+
|
| 330 |
+
#undef EXCLUDE_FROM_RTC
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_61_intrinsics.hpp
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2016 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(__SM_61_INTRINSICS_HPP__)
|
| 51 |
+
#define __SM_61_INTRINSICS_HPP__
|
| 52 |
+
|
| 53 |
+
#if defined(__CUDACC_RTC__)
|
| 54 |
+
#define __SM_61_INTRINSICS_DECL__ __device__
|
| 55 |
+
#else /* !__CUDACC_RTC__ */
|
| 56 |
+
#define __SM_61_INTRINSICS_DECL__ static __device__ __inline__
|
| 57 |
+
#endif /* __CUDACC_RTC__ */
|
| 58 |
+
|
| 59 |
+
#if defined(__cplusplus) && defined(__CUDACC__)
|
| 60 |
+
|
| 61 |
+
#if defined(_NVHPC_CUDA) || !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 610
|
| 62 |
+
|
| 63 |
+
/*******************************************************************************
|
| 64 |
+
* *
|
| 65 |
+
* *
|
| 66 |
+
* *
|
| 67 |
+
*******************************************************************************/
|
| 68 |
+
|
| 69 |
+
#include "cuda_runtime_api.h"
|
| 70 |
+
|
| 71 |
+
/*******************************************************************************
|
| 72 |
+
* *
|
| 73 |
+
* Below are implementations of SM-6.1 intrinsics which are included as *
|
| 74 |
+
* source (instead of being built in to the compiler) *
|
| 75 |
+
* *
|
| 76 |
+
*******************************************************************************/
|
| 77 |
+
|
| 78 |
+
// 4a
|
| 79 |
+
__SM_61_INTRINSICS_DECL__ int __dp4a(int srcA, int srcB, int c) {
|
| 80 |
+
int ret;
|
| 81 |
+
asm volatile ("dp4a.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 82 |
+
return ret;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp4a(unsigned int srcA, unsigned int srcB, unsigned int c) {
|
| 86 |
+
unsigned int ret;
|
| 87 |
+
asm volatile ("dp4a.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 88 |
+
return ret;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
__SM_61_INTRINSICS_DECL__ int __dp4a(char4 srcA, char4 srcB, int c) {
|
| 92 |
+
int ret;
|
| 93 |
+
asm volatile ("dp4a.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
|
| 94 |
+
return ret;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp4a(uchar4 srcA, uchar4 srcB, unsigned int c) {
|
| 98 |
+
unsigned int ret;
|
| 99 |
+
asm volatile ("dp4a.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
|
| 100 |
+
return ret;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// 2a.lo
|
| 104 |
+
__SM_61_INTRINSICS_DECL__ int __dp2a_lo(int srcA, int srcB, int c) {
|
| 105 |
+
int ret;
|
| 106 |
+
asm volatile ("dp2a.lo.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 107 |
+
return ret;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp2a_lo(unsigned int srcA, unsigned int srcB, unsigned int c) {
|
| 111 |
+
unsigned int ret;
|
| 112 |
+
asm volatile ("dp2a.lo.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 113 |
+
return ret;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
__SM_61_INTRINSICS_DECL__ int __dp2a_lo(short2 srcA, char4 srcB, int c) {
|
| 117 |
+
int ret;
|
| 118 |
+
asm volatile ("dp2a.lo.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
|
| 119 |
+
return ret;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp2a_lo(ushort2 srcA, uchar4 srcB, unsigned int c) {
|
| 123 |
+
unsigned int ret;
|
| 124 |
+
asm volatile ("dp2a.lo.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
|
| 125 |
+
return ret;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
// 2a.hi
|
| 129 |
+
__SM_61_INTRINSICS_DECL__ int __dp2a_hi(int srcA, int srcB, int c) {
|
| 130 |
+
int ret;
|
| 131 |
+
asm volatile ("dp2a.hi.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 132 |
+
return ret;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp2a_hi(unsigned int srcA, unsigned int srcB, unsigned int c) {
|
| 136 |
+
unsigned int ret;
|
| 137 |
+
asm volatile ("dp2a.hi.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
|
| 138 |
+
return ret;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
__SM_61_INTRINSICS_DECL__ int __dp2a_hi(short2 srcA, char4 srcB, int c) {
|
| 142 |
+
int ret;
|
| 143 |
+
asm volatile ("dp2a.hi.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
|
| 144 |
+
return ret;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
__SM_61_INTRINSICS_DECL__ unsigned int __dp2a_hi(ushort2 srcA, uchar4 srcB, unsigned int c) {
|
| 148 |
+
unsigned int ret;
|
| 149 |
+
asm volatile ("dp2a.hi.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
|
| 150 |
+
return ret;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
#endif /* _NVHPC_CUDA || !__CUDA_ARCH__ || __CUDA_ARCH__ >= 610 */
|
| 155 |
+
|
| 156 |
+
#endif /* __cplusplus && __CUDACC__ */
|
| 157 |
+
|
| 158 |
+
#undef __SM_61_INTRINSICS_DECL__
|
| 159 |
+
|
| 160 |
+
#endif /* !__SM_61_INTRINSICS_HPP__ */
|
| 161 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (6.96 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/convert.cpython-312.pyc
ADDED
|
Binary file (16.2 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/pack.cpython-312.pyc
ADDED
|
Binary file (4.49 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/tags.cpython-312.pyc
ADDED
|
Binary file (6.78 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/cli/__pycache__/unpack.cpython-312.pyc
ADDED
|
Binary file (1.56 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (220 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This software is made available under the terms of *either* of the licenses
|
| 2 |
+
found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
|
| 3 |
+
under the terms of *both* these licenses.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE.APACHE
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Apache License
|
| 3 |
+
Version 2.0, January 2004
|
| 4 |
+
http://www.apache.org/licenses/
|
| 5 |
+
|
| 6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 7 |
+
|
| 8 |
+
1. Definitions.
|
| 9 |
+
|
| 10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 12 |
+
|
| 13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 14 |
+
the copyright owner that is granting the License.
|
| 15 |
+
|
| 16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 17 |
+
other entities that control, are controlled by, or are under common
|
| 18 |
+
control with that entity. For the purposes of this definition,
|
| 19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 20 |
+
direction or management of such entity, whether by contract or
|
| 21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 23 |
+
|
| 24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 25 |
+
exercising permissions granted by this License.
|
| 26 |
+
|
| 27 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 28 |
+
including but not limited to software source code, documentation
|
| 29 |
+
source, and configuration files.
|
| 30 |
+
|
| 31 |
+
"Object" form shall mean any form resulting from mechanical
|
| 32 |
+
transformation or translation of a Source form, including but
|
| 33 |
+
not limited to compiled object code, generated documentation,
|
| 34 |
+
and conversions to other media types.
|
| 35 |
+
|
| 36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 37 |
+
Object form, made available under the License, as indicated by a
|
| 38 |
+
copyright notice that is included in or attached to the work
|
| 39 |
+
(an example is provided in the Appendix below).
|
| 40 |
+
|
| 41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 42 |
+
form, that is based on (or derived from) the Work and for which the
|
| 43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 45 |
+
of this License, Derivative Works shall not include works that remain
|
| 46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 47 |
+
the Work and Derivative Works thereof.
|
| 48 |
+
|
| 49 |
+
"Contribution" shall mean any work of authorship, including
|
| 50 |
+
the original version of the Work and any modifications or additions
|
| 51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 55 |
+
means any form of electronic, verbal, or written communication sent
|
| 56 |
+
to the Licensor or its representatives, including but not limited to
|
| 57 |
+
communication on electronic mailing lists, source code control systems,
|
| 58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 60 |
+
excluding communication that is conspicuously marked or otherwise
|
| 61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 62 |
+
|
| 63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 65 |
+
subsequently incorporated within the Work.
|
| 66 |
+
|
| 67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 72 |
+
Work and such Derivative Works in Source or Object form.
|
| 73 |
+
|
| 74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 77 |
+
(except as stated in this section) patent license to make, have made,
|
| 78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 79 |
+
where such license applies only to those patent claims licensable
|
| 80 |
+
by such Contributor that are necessarily infringed by their
|
| 81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 83 |
+
institute patent litigation against any entity (including a
|
| 84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 85 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 86 |
+
or contributory patent infringement, then any patent licenses
|
| 87 |
+
granted to You under this License for that Work shall terminate
|
| 88 |
+
as of the date such litigation is filed.
|
| 89 |
+
|
| 90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 91 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 92 |
+
modifications, and in Source or Object form, provided that You
|
| 93 |
+
meet the following conditions:
|
| 94 |
+
|
| 95 |
+
(a) You must give any other recipients of the Work or
|
| 96 |
+
Derivative Works a copy of this License; and
|
| 97 |
+
|
| 98 |
+
(b) You must cause any modified files to carry prominent notices
|
| 99 |
+
stating that You changed the files; and
|
| 100 |
+
|
| 101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 102 |
+
that You distribute, all copyright, patent, trademark, and
|
| 103 |
+
attribution notices from the Source form of the Work,
|
| 104 |
+
excluding those notices that do not pertain to any part of
|
| 105 |
+
the Derivative Works; and
|
| 106 |
+
|
| 107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 108 |
+
distribution, then any Derivative Works that You distribute must
|
| 109 |
+
include a readable copy of the attribution notices contained
|
| 110 |
+
within such NOTICE file, excluding those notices that do not
|
| 111 |
+
pertain to any part of the Derivative Works, in at least one
|
| 112 |
+
of the following places: within a NOTICE text file distributed
|
| 113 |
+
as part of the Derivative Works; within the Source form or
|
| 114 |
+
documentation, if provided along with the Derivative Works; or,
|
| 115 |
+
within a display generated by the Derivative Works, if and
|
| 116 |
+
wherever such third-party notices normally appear. The contents
|
| 117 |
+
of the NOTICE file are for informational purposes only and
|
| 118 |
+
do not modify the License. You may add Your own attribution
|
| 119 |
+
notices within Derivative Works that You distribute, alongside
|
| 120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 121 |
+
that such additional attribution notices cannot be construed
|
| 122 |
+
as modifying the License.
|
| 123 |
+
|
| 124 |
+
You may add Your own copyright statement to Your modifications and
|
| 125 |
+
may provide additional or different license terms and conditions
|
| 126 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 127 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 128 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 129 |
+
the conditions stated in this License.
|
| 130 |
+
|
| 131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 133 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 134 |
+
this License, without any additional terms or conditions.
|
| 135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 136 |
+
the terms of any separate license agreement you may have executed
|
| 137 |
+
with Licensor regarding such Contributions.
|
| 138 |
+
|
| 139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 141 |
+
except as required for reasonable and customary use in describing the
|
| 142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 143 |
+
|
| 144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 145 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 148 |
+
implied, including, without limitation, any warranties or conditions
|
| 149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 151 |
+
appropriateness of using or redistributing the Work and assume any
|
| 152 |
+
risks associated with Your exercise of permissions under this License.
|
| 153 |
+
|
| 154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 155 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 156 |
+
unless required by applicable law (such as deliberate and grossly
|
| 157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 158 |
+
liable to You for damages, including any direct, indirect, special,
|
| 159 |
+
incidental, or consequential damages of any character arising as a
|
| 160 |
+
result of this License or out of the use or inability to use the
|
| 161 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 162 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 163 |
+
other commercial damages or losses), even if such Contributor
|
| 164 |
+
has been advised of the possibility of such damages.
|
| 165 |
+
|
| 166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 169 |
+
or other liability obligations and/or rights consistent with this
|
| 170 |
+
License. However, in accepting such obligations, You may act only
|
| 171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 172 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 173 |
+
defend, and hold each Contributor harmless for any liability
|
| 174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 175 |
+
of your accepting any such warranty or additional liability.
|
| 176 |
+
|
| 177 |
+
END OF TERMS AND CONDITIONS
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/LICENSE.BSD
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright (c) Donald Stufft and individual contributors.
|
| 2 |
+
All rights reserved.
|
| 3 |
+
|
| 4 |
+
Redistribution and use in source and binary forms, with or without
|
| 5 |
+
modification, are permitted provided that the following conditions are met:
|
| 6 |
+
|
| 7 |
+
1. Redistributions of source code must retain the above copyright notice,
|
| 8 |
+
this list of conditions and the following disclaimer.
|
| 9 |
+
|
| 10 |
+
2. Redistributions in binary form must reproduce the above copyright
|
| 11 |
+
notice, this list of conditions and the following disclaimer in the
|
| 12 |
+
documentation and/or other materials provided with the distribution.
|
| 13 |
+
|
| 14 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
| 15 |
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 16 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 17 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 18 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 19 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 20 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 21 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 22 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 23 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__init__.py
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (230 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_elffile.cpython-312.pyc
ADDED
|
Binary file (5.06 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_manylinux.cpython-312.pyc
ADDED
|
Binary file (9.93 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_musllinux.cpython-312.pyc
ADDED
|
Binary file (4.61 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_parser.cpython-312.pyc
ADDED
|
Binary file (14.1 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_structures.cpython-312.pyc
ADDED
|
Binary file (3.28 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/_tokenizer.cpython-312.pyc
ADDED
|
Binary file (7.97 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/markers.cpython-312.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/requirements.cpython-312.pyc
ADDED
|
Binary file (4.49 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/specifiers.cpython-312.pyc
ADDED
|
Binary file (39.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/tags.cpython-312.pyc
ADDED
|
Binary file (21.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/utils.cpython-312.pyc
ADDED
|
Binary file (7.32 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/__pycache__/version.cpython-312.pyc
ADDED
|
Binary file (20 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_elffile.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ELF file parser.
|
| 3 |
+
|
| 4 |
+
This provides a class ``ELFFile`` that parses an ELF executable in a similar
|
| 5 |
+
interface to ``ZipFile``. Only the read interface is implemented.
|
| 6 |
+
|
| 7 |
+
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
|
| 8 |
+
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import enum
|
| 12 |
+
import os
|
| 13 |
+
import struct
|
| 14 |
+
from typing import IO, Optional, Tuple
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class ELFInvalid(ValueError):
|
| 18 |
+
pass
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class EIClass(enum.IntEnum):
|
| 22 |
+
C32 = 1
|
| 23 |
+
C64 = 2
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class EIData(enum.IntEnum):
|
| 27 |
+
Lsb = 1
|
| 28 |
+
Msb = 2
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class EMachine(enum.IntEnum):
|
| 32 |
+
I386 = 3
|
| 33 |
+
S390 = 22
|
| 34 |
+
Arm = 40
|
| 35 |
+
X8664 = 62
|
| 36 |
+
AArc64 = 183
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class ELFFile:
|
| 40 |
+
"""
|
| 41 |
+
Representation of an ELF executable.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, f: IO[bytes]) -> None:
|
| 45 |
+
self._f = f
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
ident = self._read("16B")
|
| 49 |
+
except struct.error:
|
| 50 |
+
raise ELFInvalid("unable to parse identification")
|
| 51 |
+
magic = bytes(ident[:4])
|
| 52 |
+
if magic != b"\x7fELF":
|
| 53 |
+
raise ELFInvalid(f"invalid magic: {magic!r}")
|
| 54 |
+
|
| 55 |
+
self.capacity = ident[4] # Format for program header (bitness).
|
| 56 |
+
self.encoding = ident[5] # Data structure encoding (endianness).
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
# e_fmt: Format for program header.
|
| 60 |
+
# p_fmt: Format for section header.
|
| 61 |
+
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
|
| 62 |
+
e_fmt, self._p_fmt, self._p_idx = {
|
| 63 |
+
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
|
| 64 |
+
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
|
| 65 |
+
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
|
| 66 |
+
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
|
| 67 |
+
}[(self.capacity, self.encoding)]
|
| 68 |
+
except KeyError:
|
| 69 |
+
raise ELFInvalid(
|
| 70 |
+
f"unrecognized capacity ({self.capacity}) or "
|
| 71 |
+
f"encoding ({self.encoding})"
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
(
|
| 76 |
+
_,
|
| 77 |
+
self.machine, # Architecture type.
|
| 78 |
+
_,
|
| 79 |
+
_,
|
| 80 |
+
self._e_phoff, # Offset of program header.
|
| 81 |
+
_,
|
| 82 |
+
self.flags, # Processor-specific flags.
|
| 83 |
+
_,
|
| 84 |
+
self._e_phentsize, # Size of section.
|
| 85 |
+
self._e_phnum, # Number of sections.
|
| 86 |
+
) = self._read(e_fmt)
|
| 87 |
+
except struct.error as e:
|
| 88 |
+
raise ELFInvalid("unable to parse machine and section information") from e
|
| 89 |
+
|
| 90 |
+
def _read(self, fmt: str) -> Tuple[int, ...]:
|
| 91 |
+
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
|
| 92 |
+
|
| 93 |
+
@property
|
| 94 |
+
def interpreter(self) -> Optional[str]:
|
| 95 |
+
"""
|
| 96 |
+
The path recorded in the ``PT_INTERP`` section header.
|
| 97 |
+
"""
|
| 98 |
+
for index in range(self._e_phnum):
|
| 99 |
+
self._f.seek(self._e_phoff + self._e_phentsize * index)
|
| 100 |
+
try:
|
| 101 |
+
data = self._read(self._p_fmt)
|
| 102 |
+
except struct.error:
|
| 103 |
+
continue
|
| 104 |
+
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
|
| 105 |
+
continue
|
| 106 |
+
self._f.seek(data[self._p_idx[1]])
|
| 107 |
+
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
|
| 108 |
+
return None
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_musllinux.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""PEP 656 support.
|
| 2 |
+
|
| 3 |
+
This module implements logic to detect if the currently running Python is
|
| 4 |
+
linked against musl, and what musl version is used.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import functools
|
| 8 |
+
import re
|
| 9 |
+
import subprocess
|
| 10 |
+
import sys
|
| 11 |
+
from typing import Iterator, NamedTuple, Optional, Sequence
|
| 12 |
+
|
| 13 |
+
from ._elffile import ELFFile
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class _MuslVersion(NamedTuple):
|
| 17 |
+
major: int
|
| 18 |
+
minor: int
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _parse_musl_version(output: str) -> Optional[_MuslVersion]:
|
| 22 |
+
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
|
| 23 |
+
if len(lines) < 2 or lines[0][:4] != "musl":
|
| 24 |
+
return None
|
| 25 |
+
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
|
| 26 |
+
if not m:
|
| 27 |
+
return None
|
| 28 |
+
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@functools.lru_cache
|
| 32 |
+
def _get_musl_version(executable: str) -> Optional[_MuslVersion]:
|
| 33 |
+
"""Detect currently-running musl runtime version.
|
| 34 |
+
|
| 35 |
+
This is done by checking the specified executable's dynamic linking
|
| 36 |
+
information, and invoking the loader to parse its output for a version
|
| 37 |
+
string. If the loader is musl, the output would be something like::
|
| 38 |
+
|
| 39 |
+
musl libc (x86_64)
|
| 40 |
+
Version 1.2.2
|
| 41 |
+
Dynamic Program Loader
|
| 42 |
+
"""
|
| 43 |
+
try:
|
| 44 |
+
with open(executable, "rb") as f:
|
| 45 |
+
ld = ELFFile(f).interpreter
|
| 46 |
+
except (OSError, TypeError, ValueError):
|
| 47 |
+
return None
|
| 48 |
+
if ld is None or "musl" not in ld:
|
| 49 |
+
return None
|
| 50 |
+
proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
|
| 51 |
+
return _parse_musl_version(proc.stderr)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
|
| 55 |
+
"""Generate musllinux tags compatible to the current platform.
|
| 56 |
+
|
| 57 |
+
:param archs: Sequence of compatible architectures.
|
| 58 |
+
The first one shall be the closest to the actual architecture and be the part of
|
| 59 |
+
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
|
| 60 |
+
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
|
| 61 |
+
be musllinux-compatible.
|
| 62 |
+
|
| 63 |
+
:returns: An iterator of compatible musllinux tags.
|
| 64 |
+
"""
|
| 65 |
+
sys_musl = _get_musl_version(sys.executable)
|
| 66 |
+
if sys_musl is None: # Python not dynamically linked against musl.
|
| 67 |
+
return
|
| 68 |
+
for arch in archs:
|
| 69 |
+
for minor in range(sys_musl.minor, -1, -1):
|
| 70 |
+
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__": # pragma: no cover
|
| 74 |
+
import sysconfig
|
| 75 |
+
|
| 76 |
+
plat = sysconfig.get_platform()
|
| 77 |
+
assert plat.startswith("linux-"), "not linux"
|
| 78 |
+
|
| 79 |
+
print("plat:", plat)
|
| 80 |
+
print("musl:", _get_musl_version(sys.executable))
|
| 81 |
+
print("tags:", end=" ")
|
| 82 |
+
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
|
| 83 |
+
print(t, end="\n ")
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_parser.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Handwritten parser of dependency specifiers.
|
| 2 |
+
|
| 3 |
+
The docstring for each __parse_* function contains EBNF-inspired grammar representing
|
| 4 |
+
the implementation.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import ast
|
| 8 |
+
from typing import Any, List, NamedTuple, Optional, Tuple, Union
|
| 9 |
+
|
| 10 |
+
from ._tokenizer import DEFAULT_RULES, Tokenizer
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Node:
|
| 14 |
+
def __init__(self, value: str) -> None:
|
| 15 |
+
self.value = value
|
| 16 |
+
|
| 17 |
+
def __str__(self) -> str:
|
| 18 |
+
return self.value
|
| 19 |
+
|
| 20 |
+
def __repr__(self) -> str:
|
| 21 |
+
return f"<{self.__class__.__name__}('{self}')>"
|
| 22 |
+
|
| 23 |
+
def serialize(self) -> str:
|
| 24 |
+
raise NotImplementedError
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class Variable(Node):
|
| 28 |
+
def serialize(self) -> str:
|
| 29 |
+
return str(self)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class Value(Node):
|
| 33 |
+
def serialize(self) -> str:
|
| 34 |
+
return f'"{self}"'
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class Op(Node):
|
| 38 |
+
def serialize(self) -> str:
|
| 39 |
+
return str(self)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
MarkerVar = Union[Variable, Value]
|
| 43 |
+
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
|
| 44 |
+
# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
|
| 45 |
+
# MarkerList = List[Union["MarkerList", MarkerAtom, str]]
|
| 46 |
+
# mypy does not support recursive type definition
|
| 47 |
+
# https://github.com/python/mypy/issues/731
|
| 48 |
+
MarkerAtom = Any
|
| 49 |
+
MarkerList = List[Any]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class ParsedRequirement(NamedTuple):
|
| 53 |
+
name: str
|
| 54 |
+
url: str
|
| 55 |
+
extras: List[str]
|
| 56 |
+
specifier: str
|
| 57 |
+
marker: Optional[MarkerList]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# --------------------------------------------------------------------------------------
|
| 61 |
+
# Recursive descent parser for dependency specifier
|
| 62 |
+
# --------------------------------------------------------------------------------------
|
| 63 |
+
def parse_requirement(source: str) -> ParsedRequirement:
|
| 64 |
+
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
|
| 68 |
+
"""
|
| 69 |
+
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
|
| 70 |
+
"""
|
| 71 |
+
tokenizer.consume("WS")
|
| 72 |
+
|
| 73 |
+
name_token = tokenizer.expect(
|
| 74 |
+
"IDENTIFIER", expected="package name at the start of dependency specifier"
|
| 75 |
+
)
|
| 76 |
+
name = name_token.text
|
| 77 |
+
tokenizer.consume("WS")
|
| 78 |
+
|
| 79 |
+
extras = _parse_extras(tokenizer)
|
| 80 |
+
tokenizer.consume("WS")
|
| 81 |
+
|
| 82 |
+
url, specifier, marker = _parse_requirement_details(tokenizer)
|
| 83 |
+
tokenizer.expect("END", expected="end of dependency specifier")
|
| 84 |
+
|
| 85 |
+
return ParsedRequirement(name, url, extras, specifier, marker)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _parse_requirement_details(
|
| 89 |
+
tokenizer: Tokenizer,
|
| 90 |
+
) -> Tuple[str, str, Optional[MarkerList]]:
|
| 91 |
+
"""
|
| 92 |
+
requirement_details = AT URL (WS requirement_marker?)?
|
| 93 |
+
| specifier WS? (requirement_marker)?
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
specifier = ""
|
| 97 |
+
url = ""
|
| 98 |
+
marker = None
|
| 99 |
+
|
| 100 |
+
if tokenizer.check("AT"):
|
| 101 |
+
tokenizer.read()
|
| 102 |
+
tokenizer.consume("WS")
|
| 103 |
+
|
| 104 |
+
url_start = tokenizer.position
|
| 105 |
+
url = tokenizer.expect("URL", expected="URL after @").text
|
| 106 |
+
if tokenizer.check("END", peek=True):
|
| 107 |
+
return (url, specifier, marker)
|
| 108 |
+
|
| 109 |
+
tokenizer.expect("WS", expected="whitespace after URL")
|
| 110 |
+
|
| 111 |
+
# The input might end after whitespace.
|
| 112 |
+
if tokenizer.check("END", peek=True):
|
| 113 |
+
return (url, specifier, marker)
|
| 114 |
+
|
| 115 |
+
marker = _parse_requirement_marker(
|
| 116 |
+
tokenizer, span_start=url_start, after="URL and whitespace"
|
| 117 |
+
)
|
| 118 |
+
else:
|
| 119 |
+
specifier_start = tokenizer.position
|
| 120 |
+
specifier = _parse_specifier(tokenizer)
|
| 121 |
+
tokenizer.consume("WS")
|
| 122 |
+
|
| 123 |
+
if tokenizer.check("END", peek=True):
|
| 124 |
+
return (url, specifier, marker)
|
| 125 |
+
|
| 126 |
+
marker = _parse_requirement_marker(
|
| 127 |
+
tokenizer,
|
| 128 |
+
span_start=specifier_start,
|
| 129 |
+
after=(
|
| 130 |
+
"version specifier"
|
| 131 |
+
if specifier
|
| 132 |
+
else "name and no valid version specifier"
|
| 133 |
+
),
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
return (url, specifier, marker)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _parse_requirement_marker(
|
| 140 |
+
tokenizer: Tokenizer, *, span_start: int, after: str
|
| 141 |
+
) -> MarkerList:
|
| 142 |
+
"""
|
| 143 |
+
requirement_marker = SEMICOLON marker WS?
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
if not tokenizer.check("SEMICOLON"):
|
| 147 |
+
tokenizer.raise_syntax_error(
|
| 148 |
+
f"Expected end or semicolon (after {after})",
|
| 149 |
+
span_start=span_start,
|
| 150 |
+
)
|
| 151 |
+
tokenizer.read()
|
| 152 |
+
|
| 153 |
+
marker = _parse_marker(tokenizer)
|
| 154 |
+
tokenizer.consume("WS")
|
| 155 |
+
|
| 156 |
+
return marker
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _parse_extras(tokenizer: Tokenizer) -> List[str]:
|
| 160 |
+
"""
|
| 161 |
+
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
|
| 162 |
+
"""
|
| 163 |
+
if not tokenizer.check("LEFT_BRACKET", peek=True):
|
| 164 |
+
return []
|
| 165 |
+
|
| 166 |
+
with tokenizer.enclosing_tokens(
|
| 167 |
+
"LEFT_BRACKET",
|
| 168 |
+
"RIGHT_BRACKET",
|
| 169 |
+
around="extras",
|
| 170 |
+
):
|
| 171 |
+
tokenizer.consume("WS")
|
| 172 |
+
extras = _parse_extras_list(tokenizer)
|
| 173 |
+
tokenizer.consume("WS")
|
| 174 |
+
|
| 175 |
+
return extras
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def _parse_extras_list(tokenizer: Tokenizer) -> List[str]:
|
| 179 |
+
"""
|
| 180 |
+
extras_list = identifier (wsp* ',' wsp* identifier)*
|
| 181 |
+
"""
|
| 182 |
+
extras: List[str] = []
|
| 183 |
+
|
| 184 |
+
if not tokenizer.check("IDENTIFIER"):
|
| 185 |
+
return extras
|
| 186 |
+
|
| 187 |
+
extras.append(tokenizer.read().text)
|
| 188 |
+
|
| 189 |
+
while True:
|
| 190 |
+
tokenizer.consume("WS")
|
| 191 |
+
if tokenizer.check("IDENTIFIER", peek=True):
|
| 192 |
+
tokenizer.raise_syntax_error("Expected comma between extra names")
|
| 193 |
+
elif not tokenizer.check("COMMA"):
|
| 194 |
+
break
|
| 195 |
+
|
| 196 |
+
tokenizer.read()
|
| 197 |
+
tokenizer.consume("WS")
|
| 198 |
+
|
| 199 |
+
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
|
| 200 |
+
extras.append(extra_token.text)
|
| 201 |
+
|
| 202 |
+
return extras
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _parse_specifier(tokenizer: Tokenizer) -> str:
|
| 206 |
+
"""
|
| 207 |
+
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
|
| 208 |
+
| WS? version_many WS?
|
| 209 |
+
"""
|
| 210 |
+
with tokenizer.enclosing_tokens(
|
| 211 |
+
"LEFT_PARENTHESIS",
|
| 212 |
+
"RIGHT_PARENTHESIS",
|
| 213 |
+
around="version specifier",
|
| 214 |
+
):
|
| 215 |
+
tokenizer.consume("WS")
|
| 216 |
+
parsed_specifiers = _parse_version_many(tokenizer)
|
| 217 |
+
tokenizer.consume("WS")
|
| 218 |
+
|
| 219 |
+
return parsed_specifiers
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def _parse_version_many(tokenizer: Tokenizer) -> str:
|
| 223 |
+
"""
|
| 224 |
+
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
|
| 225 |
+
"""
|
| 226 |
+
parsed_specifiers = ""
|
| 227 |
+
while tokenizer.check("SPECIFIER"):
|
| 228 |
+
span_start = tokenizer.position
|
| 229 |
+
parsed_specifiers += tokenizer.read().text
|
| 230 |
+
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
|
| 231 |
+
tokenizer.raise_syntax_error(
|
| 232 |
+
".* suffix can only be used with `==` or `!=` operators",
|
| 233 |
+
span_start=span_start,
|
| 234 |
+
span_end=tokenizer.position + 1,
|
| 235 |
+
)
|
| 236 |
+
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
|
| 237 |
+
tokenizer.raise_syntax_error(
|
| 238 |
+
"Local version label can only be used with `==` or `!=` operators",
|
| 239 |
+
span_start=span_start,
|
| 240 |
+
span_end=tokenizer.position,
|
| 241 |
+
)
|
| 242 |
+
tokenizer.consume("WS")
|
| 243 |
+
if not tokenizer.check("COMMA"):
|
| 244 |
+
break
|
| 245 |
+
parsed_specifiers += tokenizer.read().text
|
| 246 |
+
tokenizer.consume("WS")
|
| 247 |
+
|
| 248 |
+
return parsed_specifiers
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
# --------------------------------------------------------------------------------------
|
| 252 |
+
# Recursive descent parser for marker expression
|
| 253 |
+
# --------------------------------------------------------------------------------------
|
| 254 |
+
def parse_marker(source: str) -> MarkerList:
|
| 255 |
+
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
|
| 259 |
+
retval = _parse_marker(tokenizer)
|
| 260 |
+
tokenizer.expect("END", expected="end of marker expression")
|
| 261 |
+
return retval
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
|
| 265 |
+
"""
|
| 266 |
+
marker = marker_atom (BOOLOP marker_atom)+
|
| 267 |
+
"""
|
| 268 |
+
expression = [_parse_marker_atom(tokenizer)]
|
| 269 |
+
while tokenizer.check("BOOLOP"):
|
| 270 |
+
token = tokenizer.read()
|
| 271 |
+
expr_right = _parse_marker_atom(tokenizer)
|
| 272 |
+
expression.extend((token.text, expr_right))
|
| 273 |
+
return expression
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
|
| 277 |
+
"""
|
| 278 |
+
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
|
| 279 |
+
| WS? marker_item WS?
|
| 280 |
+
"""
|
| 281 |
+
|
| 282 |
+
tokenizer.consume("WS")
|
| 283 |
+
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
|
| 284 |
+
with tokenizer.enclosing_tokens(
|
| 285 |
+
"LEFT_PARENTHESIS",
|
| 286 |
+
"RIGHT_PARENTHESIS",
|
| 287 |
+
around="marker expression",
|
| 288 |
+
):
|
| 289 |
+
tokenizer.consume("WS")
|
| 290 |
+
marker: MarkerAtom = _parse_marker(tokenizer)
|
| 291 |
+
tokenizer.consume("WS")
|
| 292 |
+
else:
|
| 293 |
+
marker = _parse_marker_item(tokenizer)
|
| 294 |
+
tokenizer.consume("WS")
|
| 295 |
+
return marker
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
|
| 299 |
+
"""
|
| 300 |
+
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
|
| 301 |
+
"""
|
| 302 |
+
tokenizer.consume("WS")
|
| 303 |
+
marker_var_left = _parse_marker_var(tokenizer)
|
| 304 |
+
tokenizer.consume("WS")
|
| 305 |
+
marker_op = _parse_marker_op(tokenizer)
|
| 306 |
+
tokenizer.consume("WS")
|
| 307 |
+
marker_var_right = _parse_marker_var(tokenizer)
|
| 308 |
+
tokenizer.consume("WS")
|
| 309 |
+
return (marker_var_left, marker_op, marker_var_right)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
|
| 313 |
+
"""
|
| 314 |
+
marker_var = VARIABLE | QUOTED_STRING
|
| 315 |
+
"""
|
| 316 |
+
if tokenizer.check("VARIABLE"):
|
| 317 |
+
return process_env_var(tokenizer.read().text.replace(".", "_"))
|
| 318 |
+
elif tokenizer.check("QUOTED_STRING"):
|
| 319 |
+
return process_python_str(tokenizer.read().text)
|
| 320 |
+
else:
|
| 321 |
+
tokenizer.raise_syntax_error(
|
| 322 |
+
message="Expected a marker variable or quoted string"
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def process_env_var(env_var: str) -> Variable:
|
| 327 |
+
if env_var in ("platform_python_implementation", "python_implementation"):
|
| 328 |
+
return Variable("platform_python_implementation")
|
| 329 |
+
else:
|
| 330 |
+
return Variable(env_var)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def process_python_str(python_str: str) -> Value:
|
| 334 |
+
value = ast.literal_eval(python_str)
|
| 335 |
+
return Value(str(value))
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
|
| 339 |
+
"""
|
| 340 |
+
marker_op = IN | NOT IN | OP
|
| 341 |
+
"""
|
| 342 |
+
if tokenizer.check("IN"):
|
| 343 |
+
tokenizer.read()
|
| 344 |
+
return Op("in")
|
| 345 |
+
elif tokenizer.check("NOT"):
|
| 346 |
+
tokenizer.read()
|
| 347 |
+
tokenizer.expect("WS", expected="whitespace after 'not'")
|
| 348 |
+
tokenizer.expect("IN", expected="'in' after 'not'")
|
| 349 |
+
return Op("not in")
|
| 350 |
+
elif tokenizer.check("OP"):
|
| 351 |
+
return Op(tokenizer.read().text)
|
| 352 |
+
else:
|
| 353 |
+
return tokenizer.raise_syntax_error(
|
| 354 |
+
"Expected marker operator, one of "
|
| 355 |
+
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
|
| 356 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_structures.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class InfinityType:
|
| 7 |
+
def __repr__(self) -> str:
|
| 8 |
+
return "Infinity"
|
| 9 |
+
|
| 10 |
+
def __hash__(self) -> int:
|
| 11 |
+
return hash(repr(self))
|
| 12 |
+
|
| 13 |
+
def __lt__(self, other: object) -> bool:
|
| 14 |
+
return False
|
| 15 |
+
|
| 16 |
+
def __le__(self, other: object) -> bool:
|
| 17 |
+
return False
|
| 18 |
+
|
| 19 |
+
def __eq__(self, other: object) -> bool:
|
| 20 |
+
return isinstance(other, self.__class__)
|
| 21 |
+
|
| 22 |
+
def __gt__(self, other: object) -> bool:
|
| 23 |
+
return True
|
| 24 |
+
|
| 25 |
+
def __ge__(self, other: object) -> bool:
|
| 26 |
+
return True
|
| 27 |
+
|
| 28 |
+
def __neg__(self: object) -> "NegativeInfinityType":
|
| 29 |
+
return NegativeInfinity
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
Infinity = InfinityType()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class NegativeInfinityType:
|
| 36 |
+
def __repr__(self) -> str:
|
| 37 |
+
return "-Infinity"
|
| 38 |
+
|
| 39 |
+
def __hash__(self) -> int:
|
| 40 |
+
return hash(repr(self))
|
| 41 |
+
|
| 42 |
+
def __lt__(self, other: object) -> bool:
|
| 43 |
+
return True
|
| 44 |
+
|
| 45 |
+
def __le__(self, other: object) -> bool:
|
| 46 |
+
return True
|
| 47 |
+
|
| 48 |
+
def __eq__(self, other: object) -> bool:
|
| 49 |
+
return isinstance(other, self.__class__)
|
| 50 |
+
|
| 51 |
+
def __gt__(self, other: object) -> bool:
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
def __ge__(self, other: object) -> bool:
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
def __neg__(self: object) -> InfinityType:
|
| 58 |
+
return Infinity
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
NegativeInfinity = NegativeInfinityType()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/_tokenizer.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import contextlib
|
| 2 |
+
import re
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union
|
| 5 |
+
|
| 6 |
+
from .specifiers import Specifier
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class Token:
|
| 11 |
+
name: str
|
| 12 |
+
text: str
|
| 13 |
+
position: int
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ParserSyntaxError(Exception):
|
| 17 |
+
"""The provided source text could not be parsed correctly."""
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
message: str,
|
| 22 |
+
*,
|
| 23 |
+
source: str,
|
| 24 |
+
span: Tuple[int, int],
|
| 25 |
+
) -> None:
|
| 26 |
+
self.span = span
|
| 27 |
+
self.message = message
|
| 28 |
+
self.source = source
|
| 29 |
+
|
| 30 |
+
super().__init__()
|
| 31 |
+
|
| 32 |
+
def __str__(self) -> str:
|
| 33 |
+
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
|
| 34 |
+
return "\n ".join([self.message, self.source, marker])
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
|
| 38 |
+
"LEFT_PARENTHESIS": r"\(",
|
| 39 |
+
"RIGHT_PARENTHESIS": r"\)",
|
| 40 |
+
"LEFT_BRACKET": r"\[",
|
| 41 |
+
"RIGHT_BRACKET": r"\]",
|
| 42 |
+
"SEMICOLON": r";",
|
| 43 |
+
"COMMA": r",",
|
| 44 |
+
"QUOTED_STRING": re.compile(
|
| 45 |
+
r"""
|
| 46 |
+
(
|
| 47 |
+
('[^']*')
|
| 48 |
+
|
|
| 49 |
+
("[^"]*")
|
| 50 |
+
)
|
| 51 |
+
""",
|
| 52 |
+
re.VERBOSE,
|
| 53 |
+
),
|
| 54 |
+
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
|
| 55 |
+
"BOOLOP": r"\b(or|and)\b",
|
| 56 |
+
"IN": r"\bin\b",
|
| 57 |
+
"NOT": r"\bnot\b",
|
| 58 |
+
"VARIABLE": re.compile(
|
| 59 |
+
r"""
|
| 60 |
+
\b(
|
| 61 |
+
python_version
|
| 62 |
+
|python_full_version
|
| 63 |
+
|os[._]name
|
| 64 |
+
|sys[._]platform
|
| 65 |
+
|platform_(release|system)
|
| 66 |
+
|platform[._](version|machine|python_implementation)
|
| 67 |
+
|python_implementation
|
| 68 |
+
|implementation_(name|version)
|
| 69 |
+
|extra
|
| 70 |
+
)\b
|
| 71 |
+
""",
|
| 72 |
+
re.VERBOSE,
|
| 73 |
+
),
|
| 74 |
+
"SPECIFIER": re.compile(
|
| 75 |
+
Specifier._operator_regex_str + Specifier._version_regex_str,
|
| 76 |
+
re.VERBOSE | re.IGNORECASE,
|
| 77 |
+
),
|
| 78 |
+
"AT": r"\@",
|
| 79 |
+
"URL": r"[^ \t]+",
|
| 80 |
+
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
|
| 81 |
+
"VERSION_PREFIX_TRAIL": r"\.\*",
|
| 82 |
+
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
|
| 83 |
+
"WS": r"[ \t]+",
|
| 84 |
+
"END": r"$",
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class Tokenizer:
|
| 89 |
+
"""Context-sensitive token parsing.
|
| 90 |
+
|
| 91 |
+
Provides methods to examine the input stream to check whether the next token
|
| 92 |
+
matches.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def __init__(
|
| 96 |
+
self,
|
| 97 |
+
source: str,
|
| 98 |
+
*,
|
| 99 |
+
rules: "Dict[str, Union[str, re.Pattern[str]]]",
|
| 100 |
+
) -> None:
|
| 101 |
+
self.source = source
|
| 102 |
+
self.rules: Dict[str, re.Pattern[str]] = {
|
| 103 |
+
name: re.compile(pattern) for name, pattern in rules.items()
|
| 104 |
+
}
|
| 105 |
+
self.next_token: Optional[Token] = None
|
| 106 |
+
self.position = 0
|
| 107 |
+
|
| 108 |
+
def consume(self, name: str) -> None:
|
| 109 |
+
"""Move beyond provided token name, if at current position."""
|
| 110 |
+
if self.check(name):
|
| 111 |
+
self.read()
|
| 112 |
+
|
| 113 |
+
def check(self, name: str, *, peek: bool = False) -> bool:
|
| 114 |
+
"""Check whether the next token has the provided name.
|
| 115 |
+
|
| 116 |
+
By default, if the check succeeds, the token *must* be read before
|
| 117 |
+
another check. If `peek` is set to `True`, the token is not loaded and
|
| 118 |
+
would need to be checked again.
|
| 119 |
+
"""
|
| 120 |
+
assert (
|
| 121 |
+
self.next_token is None
|
| 122 |
+
), f"Cannot check for {name!r}, already have {self.next_token!r}"
|
| 123 |
+
assert name in self.rules, f"Unknown token name: {name!r}"
|
| 124 |
+
|
| 125 |
+
expression = self.rules[name]
|
| 126 |
+
|
| 127 |
+
match = expression.match(self.source, self.position)
|
| 128 |
+
if match is None:
|
| 129 |
+
return False
|
| 130 |
+
if not peek:
|
| 131 |
+
self.next_token = Token(name, match[0], self.position)
|
| 132 |
+
return True
|
| 133 |
+
|
| 134 |
+
def expect(self, name: str, *, expected: str) -> Token:
|
| 135 |
+
"""Expect a certain token name next, failing with a syntax error otherwise.
|
| 136 |
+
|
| 137 |
+
The token is *not* read.
|
| 138 |
+
"""
|
| 139 |
+
if not self.check(name):
|
| 140 |
+
raise self.raise_syntax_error(f"Expected {expected}")
|
| 141 |
+
return self.read()
|
| 142 |
+
|
| 143 |
+
def read(self) -> Token:
|
| 144 |
+
"""Consume the next token and return it."""
|
| 145 |
+
token = self.next_token
|
| 146 |
+
assert token is not None
|
| 147 |
+
|
| 148 |
+
self.position += len(token.text)
|
| 149 |
+
self.next_token = None
|
| 150 |
+
|
| 151 |
+
return token
|
| 152 |
+
|
| 153 |
+
def raise_syntax_error(
|
| 154 |
+
self,
|
| 155 |
+
message: str,
|
| 156 |
+
*,
|
| 157 |
+
span_start: Optional[int] = None,
|
| 158 |
+
span_end: Optional[int] = None,
|
| 159 |
+
) -> NoReturn:
|
| 160 |
+
"""Raise ParserSyntaxError at the given position."""
|
| 161 |
+
span = (
|
| 162 |
+
self.position if span_start is None else span_start,
|
| 163 |
+
self.position if span_end is None else span_end,
|
| 164 |
+
)
|
| 165 |
+
raise ParserSyntaxError(
|
| 166 |
+
message,
|
| 167 |
+
source=self.source,
|
| 168 |
+
span=span,
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
@contextlib.contextmanager
|
| 172 |
+
def enclosing_tokens(
|
| 173 |
+
self, open_token: str, close_token: str, *, around: str
|
| 174 |
+
) -> Iterator[None]:
|
| 175 |
+
if self.check(open_token):
|
| 176 |
+
open_position = self.position
|
| 177 |
+
self.read()
|
| 178 |
+
else:
|
| 179 |
+
open_position = None
|
| 180 |
+
|
| 181 |
+
yield
|
| 182 |
+
|
| 183 |
+
if open_position is None:
|
| 184 |
+
return
|
| 185 |
+
|
| 186 |
+
if not self.check(close_token):
|
| 187 |
+
self.raise_syntax_error(
|
| 188 |
+
f"Expected matching {close_token} for {open_token}, after {around}",
|
| 189 |
+
span_start=open_position,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
self.read()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/markers.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
import operator
|
| 6 |
+
import os
|
| 7 |
+
import platform
|
| 8 |
+
import sys
|
| 9 |
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
| 10 |
+
|
| 11 |
+
from ._parser import (
|
| 12 |
+
MarkerAtom,
|
| 13 |
+
MarkerList,
|
| 14 |
+
Op,
|
| 15 |
+
Value,
|
| 16 |
+
Variable,
|
| 17 |
+
)
|
| 18 |
+
from ._parser import (
|
| 19 |
+
parse_marker as _parse_marker,
|
| 20 |
+
)
|
| 21 |
+
from ._tokenizer import ParserSyntaxError
|
| 22 |
+
from .specifiers import InvalidSpecifier, Specifier
|
| 23 |
+
from .utils import canonicalize_name
|
| 24 |
+
|
| 25 |
+
__all__ = [
|
| 26 |
+
"InvalidMarker",
|
| 27 |
+
"UndefinedComparison",
|
| 28 |
+
"UndefinedEnvironmentName",
|
| 29 |
+
"Marker",
|
| 30 |
+
"default_environment",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
Operator = Callable[[str, str], bool]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class InvalidMarker(ValueError):
|
| 37 |
+
"""
|
| 38 |
+
An invalid marker was found, users should refer to PEP 508.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class UndefinedComparison(ValueError):
|
| 43 |
+
"""
|
| 44 |
+
An invalid operation was attempted on a value that doesn't support it.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class UndefinedEnvironmentName(ValueError):
|
| 49 |
+
"""
|
| 50 |
+
A name was attempted to be used that does not exist inside of the
|
| 51 |
+
environment.
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _normalize_extra_values(results: Any) -> Any:
|
| 56 |
+
"""
|
| 57 |
+
Normalize extra values.
|
| 58 |
+
"""
|
| 59 |
+
if isinstance(results[0], tuple):
|
| 60 |
+
lhs, op, rhs = results[0]
|
| 61 |
+
if isinstance(lhs, Variable) and lhs.value == "extra":
|
| 62 |
+
normalized_extra = canonicalize_name(rhs.value)
|
| 63 |
+
rhs = Value(normalized_extra)
|
| 64 |
+
elif isinstance(rhs, Variable) and rhs.value == "extra":
|
| 65 |
+
normalized_extra = canonicalize_name(lhs.value)
|
| 66 |
+
lhs = Value(normalized_extra)
|
| 67 |
+
results[0] = lhs, op, rhs
|
| 68 |
+
return results
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _format_marker(
|
| 72 |
+
marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True
|
| 73 |
+
) -> str:
|
| 74 |
+
assert isinstance(marker, (list, tuple, str))
|
| 75 |
+
|
| 76 |
+
# Sometimes we have a structure like [[...]] which is a single item list
|
| 77 |
+
# where the single item is itself it's own list. In that case we want skip
|
| 78 |
+
# the rest of this function so that we don't get extraneous () on the
|
| 79 |
+
# outside.
|
| 80 |
+
if (
|
| 81 |
+
isinstance(marker, list)
|
| 82 |
+
and len(marker) == 1
|
| 83 |
+
and isinstance(marker[0], (list, tuple))
|
| 84 |
+
):
|
| 85 |
+
return _format_marker(marker[0])
|
| 86 |
+
|
| 87 |
+
if isinstance(marker, list):
|
| 88 |
+
inner = (_format_marker(m, first=False) for m in marker)
|
| 89 |
+
if first:
|
| 90 |
+
return " ".join(inner)
|
| 91 |
+
else:
|
| 92 |
+
return "(" + " ".join(inner) + ")"
|
| 93 |
+
elif isinstance(marker, tuple):
|
| 94 |
+
return " ".join([m.serialize() for m in marker])
|
| 95 |
+
else:
|
| 96 |
+
return marker
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
_operators: Dict[str, Operator] = {
|
| 100 |
+
"in": lambda lhs, rhs: lhs in rhs,
|
| 101 |
+
"not in": lambda lhs, rhs: lhs not in rhs,
|
| 102 |
+
"<": operator.lt,
|
| 103 |
+
"<=": operator.le,
|
| 104 |
+
"==": operator.eq,
|
| 105 |
+
"!=": operator.ne,
|
| 106 |
+
">=": operator.ge,
|
| 107 |
+
">": operator.gt,
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
|
| 112 |
+
try:
|
| 113 |
+
spec = Specifier("".join([op.serialize(), rhs]))
|
| 114 |
+
except InvalidSpecifier:
|
| 115 |
+
pass
|
| 116 |
+
else:
|
| 117 |
+
return spec.contains(lhs, prereleases=True)
|
| 118 |
+
|
| 119 |
+
oper: Optional[Operator] = _operators.get(op.serialize())
|
| 120 |
+
if oper is None:
|
| 121 |
+
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
|
| 122 |
+
|
| 123 |
+
return oper(lhs, rhs)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def _normalize(*values: str, key: str) -> Tuple[str, ...]:
|
| 127 |
+
# PEP 685 – Comparison of extra names for optional distribution dependencies
|
| 128 |
+
# https://peps.python.org/pep-0685/
|
| 129 |
+
# > When comparing extra names, tools MUST normalize the names being
|
| 130 |
+
# > compared using the semantics outlined in PEP 503 for names
|
| 131 |
+
if key == "extra":
|
| 132 |
+
return tuple(canonicalize_name(v) for v in values)
|
| 133 |
+
|
| 134 |
+
# other environment markers don't have such standards
|
| 135 |
+
return values
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool:
|
| 139 |
+
groups: List[List[bool]] = [[]]
|
| 140 |
+
|
| 141 |
+
for marker in markers:
|
| 142 |
+
assert isinstance(marker, (list, tuple, str))
|
| 143 |
+
|
| 144 |
+
if isinstance(marker, list):
|
| 145 |
+
groups[-1].append(_evaluate_markers(marker, environment))
|
| 146 |
+
elif isinstance(marker, tuple):
|
| 147 |
+
lhs, op, rhs = marker
|
| 148 |
+
|
| 149 |
+
if isinstance(lhs, Variable):
|
| 150 |
+
environment_key = lhs.value
|
| 151 |
+
lhs_value = environment[environment_key]
|
| 152 |
+
rhs_value = rhs.value
|
| 153 |
+
else:
|
| 154 |
+
lhs_value = lhs.value
|
| 155 |
+
environment_key = rhs.value
|
| 156 |
+
rhs_value = environment[environment_key]
|
| 157 |
+
|
| 158 |
+
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
|
| 159 |
+
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
|
| 160 |
+
else:
|
| 161 |
+
assert marker in ["and", "or"]
|
| 162 |
+
if marker == "or":
|
| 163 |
+
groups.append([])
|
| 164 |
+
|
| 165 |
+
return any(all(item) for item in groups)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def format_full_version(info: "sys._version_info") -> str:
|
| 169 |
+
version = "{0.major}.{0.minor}.{0.micro}".format(info)
|
| 170 |
+
kind = info.releaselevel
|
| 171 |
+
if kind != "final":
|
| 172 |
+
version += kind[0] + str(info.serial)
|
| 173 |
+
return version
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def default_environment() -> Dict[str, str]:
|
| 177 |
+
iver = format_full_version(sys.implementation.version)
|
| 178 |
+
implementation_name = sys.implementation.name
|
| 179 |
+
return {
|
| 180 |
+
"implementation_name": implementation_name,
|
| 181 |
+
"implementation_version": iver,
|
| 182 |
+
"os_name": os.name,
|
| 183 |
+
"platform_machine": platform.machine(),
|
| 184 |
+
"platform_release": platform.release(),
|
| 185 |
+
"platform_system": platform.system(),
|
| 186 |
+
"platform_version": platform.version(),
|
| 187 |
+
"python_full_version": platform.python_version(),
|
| 188 |
+
"platform_python_implementation": platform.python_implementation(),
|
| 189 |
+
"python_version": ".".join(platform.python_version_tuple()[:2]),
|
| 190 |
+
"sys_platform": sys.platform,
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
class Marker:
|
| 195 |
+
def __init__(self, marker: str) -> None:
|
| 196 |
+
# Note: We create a Marker object without calling this constructor in
|
| 197 |
+
# packaging.requirements.Requirement. If any additional logic is
|
| 198 |
+
# added here, make sure to mirror/adapt Requirement.
|
| 199 |
+
try:
|
| 200 |
+
self._markers = _normalize_extra_values(_parse_marker(marker))
|
| 201 |
+
# The attribute `_markers` can be described in terms of a recursive type:
|
| 202 |
+
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
|
| 203 |
+
#
|
| 204 |
+
# For example, the following expression:
|
| 205 |
+
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
|
| 206 |
+
#
|
| 207 |
+
# is parsed into:
|
| 208 |
+
# [
|
| 209 |
+
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
|
| 210 |
+
# 'and',
|
| 211 |
+
# [
|
| 212 |
+
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
|
| 213 |
+
# 'or',
|
| 214 |
+
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
|
| 215 |
+
# ]
|
| 216 |
+
# ]
|
| 217 |
+
except ParserSyntaxError as e:
|
| 218 |
+
raise InvalidMarker(str(e)) from e
|
| 219 |
+
|
| 220 |
+
def __str__(self) -> str:
|
| 221 |
+
return _format_marker(self._markers)
|
| 222 |
+
|
| 223 |
+
def __repr__(self) -> str:
|
| 224 |
+
return f"<Marker('{self}')>"
|
| 225 |
+
|
| 226 |
+
def __hash__(self) -> int:
|
| 227 |
+
return hash((self.__class__.__name__, str(self)))
|
| 228 |
+
|
| 229 |
+
def __eq__(self, other: Any) -> bool:
|
| 230 |
+
if not isinstance(other, Marker):
|
| 231 |
+
return NotImplemented
|
| 232 |
+
|
| 233 |
+
return str(self) == str(other)
|
| 234 |
+
|
| 235 |
+
def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool:
|
| 236 |
+
"""Evaluate a marker.
|
| 237 |
+
|
| 238 |
+
Return the boolean from evaluating the given marker against the
|
| 239 |
+
environment. environment is an optional argument to override all or
|
| 240 |
+
part of the determined environment.
|
| 241 |
+
|
| 242 |
+
The environment is determined from the current Python process.
|
| 243 |
+
"""
|
| 244 |
+
current_environment = default_environment()
|
| 245 |
+
current_environment["extra"] = ""
|
| 246 |
+
if environment is not None:
|
| 247 |
+
current_environment.update(environment)
|
| 248 |
+
# The API used to allow setting extra to None. We need to handle this
|
| 249 |
+
# case for backwards compatibility.
|
| 250 |
+
if current_environment["extra"] is None:
|
| 251 |
+
current_environment["extra"] = ""
|
| 252 |
+
|
| 253 |
+
return _evaluate_markers(self._markers, current_environment)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/specifiers.py
ADDED
|
@@ -0,0 +1,1011 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
"""
|
| 5 |
+
.. testsetup::
|
| 6 |
+
|
| 7 |
+
from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier
|
| 8 |
+
from packaging.version import Version
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import abc
|
| 12 |
+
import itertools
|
| 13 |
+
import re
|
| 14 |
+
from typing import Callable, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union
|
| 15 |
+
|
| 16 |
+
from .utils import canonicalize_version
|
| 17 |
+
from .version import Version
|
| 18 |
+
|
| 19 |
+
UnparsedVersion = Union[Version, str]
|
| 20 |
+
UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion)
|
| 21 |
+
CallableOperator = Callable[[Version, str], bool]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _coerce_version(version: UnparsedVersion) -> Version:
|
| 25 |
+
if not isinstance(version, Version):
|
| 26 |
+
version = Version(version)
|
| 27 |
+
return version
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InvalidSpecifier(ValueError):
|
| 31 |
+
"""
|
| 32 |
+
Raised when attempting to create a :class:`Specifier` with a specifier
|
| 33 |
+
string that is invalid.
|
| 34 |
+
|
| 35 |
+
>>> Specifier("lolwat")
|
| 36 |
+
Traceback (most recent call last):
|
| 37 |
+
...
|
| 38 |
+
packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat'
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class BaseSpecifier(metaclass=abc.ABCMeta):
|
| 43 |
+
@abc.abstractmethod
|
| 44 |
+
def __str__(self) -> str:
|
| 45 |
+
"""
|
| 46 |
+
Returns the str representation of this Specifier-like object. This
|
| 47 |
+
should be representative of the Specifier itself.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
@abc.abstractmethod
|
| 51 |
+
def __hash__(self) -> int:
|
| 52 |
+
"""
|
| 53 |
+
Returns a hash value for this Specifier-like object.
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
@abc.abstractmethod
|
| 57 |
+
def __eq__(self, other: object) -> bool:
|
| 58 |
+
"""
|
| 59 |
+
Returns a boolean representing whether or not the two Specifier-like
|
| 60 |
+
objects are equal.
|
| 61 |
+
|
| 62 |
+
:param other: The other object to check against.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
@property
|
| 66 |
+
@abc.abstractmethod
|
| 67 |
+
def prereleases(self) -> Optional[bool]:
|
| 68 |
+
"""Whether or not pre-releases as a whole are allowed.
|
| 69 |
+
|
| 70 |
+
This can be set to either ``True`` or ``False`` to explicitly enable or disable
|
| 71 |
+
prereleases or it can be set to ``None`` (the default) to use default semantics.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
@prereleases.setter
|
| 75 |
+
def prereleases(self, value: bool) -> None:
|
| 76 |
+
"""Setter for :attr:`prereleases`.
|
| 77 |
+
|
| 78 |
+
:param value: The value to set.
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
@abc.abstractmethod
|
| 82 |
+
def contains(self, item: str, prereleases: Optional[bool] = None) -> bool:
|
| 83 |
+
"""
|
| 84 |
+
Determines if the given item is contained within this specifier.
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
@abc.abstractmethod
|
| 88 |
+
def filter(
|
| 89 |
+
self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None
|
| 90 |
+
) -> Iterator[UnparsedVersionVar]:
|
| 91 |
+
"""
|
| 92 |
+
Takes an iterable of items and filters them so that only items which
|
| 93 |
+
are contained within this specifier are allowed in it.
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class Specifier(BaseSpecifier):
|
| 98 |
+
"""This class abstracts handling of version specifiers.
|
| 99 |
+
|
| 100 |
+
.. tip::
|
| 101 |
+
|
| 102 |
+
It is generally not required to instantiate this manually. You should instead
|
| 103 |
+
prefer to work with :class:`SpecifierSet` instead, which can parse
|
| 104 |
+
comma-separated version specifiers (which is what package metadata contains).
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
_operator_regex_str = r"""
|
| 108 |
+
(?P<operator>(~=|==|!=|<=|>=|<|>|===))
|
| 109 |
+
"""
|
| 110 |
+
_version_regex_str = r"""
|
| 111 |
+
(?P<version>
|
| 112 |
+
(?:
|
| 113 |
+
# The identity operators allow for an escape hatch that will
|
| 114 |
+
# do an exact string match of the version you wish to install.
|
| 115 |
+
# This will not be parsed by PEP 440 and we cannot determine
|
| 116 |
+
# any semantic meaning from it. This operator is discouraged
|
| 117 |
+
# but included entirely as an escape hatch.
|
| 118 |
+
(?<====) # Only match for the identity operator
|
| 119 |
+
\s*
|
| 120 |
+
[^\s;)]* # The arbitrary version can be just about anything,
|
| 121 |
+
# we match everything except for whitespace, a
|
| 122 |
+
# semi-colon for marker support, and a closing paren
|
| 123 |
+
# since versions can be enclosed in them.
|
| 124 |
+
)
|
| 125 |
+
|
|
| 126 |
+
(?:
|
| 127 |
+
# The (non)equality operators allow for wild card and local
|
| 128 |
+
# versions to be specified so we have to define these two
|
| 129 |
+
# operators separately to enable that.
|
| 130 |
+
(?<===|!=) # Only match for equals and not equals
|
| 131 |
+
|
| 132 |
+
\s*
|
| 133 |
+
v?
|
| 134 |
+
(?:[0-9]+!)? # epoch
|
| 135 |
+
[0-9]+(?:\.[0-9]+)* # release
|
| 136 |
+
|
| 137 |
+
# You cannot use a wild card and a pre-release, post-release, a dev or
|
| 138 |
+
# local version together so group them with a | and make them optional.
|
| 139 |
+
(?:
|
| 140 |
+
\.\* # Wild card syntax of .*
|
| 141 |
+
|
|
| 142 |
+
(?: # pre release
|
| 143 |
+
[-_\.]?
|
| 144 |
+
(alpha|beta|preview|pre|a|b|c|rc)
|
| 145 |
+
[-_\.]?
|
| 146 |
+
[0-9]*
|
| 147 |
+
)?
|
| 148 |
+
(?: # post release
|
| 149 |
+
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
|
| 150 |
+
)?
|
| 151 |
+
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
|
| 152 |
+
(?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local
|
| 153 |
+
)?
|
| 154 |
+
)
|
| 155 |
+
|
|
| 156 |
+
(?:
|
| 157 |
+
# The compatible operator requires at least two digits in the
|
| 158 |
+
# release segment.
|
| 159 |
+
(?<=~=) # Only match for the compatible operator
|
| 160 |
+
|
| 161 |
+
\s*
|
| 162 |
+
v?
|
| 163 |
+
(?:[0-9]+!)? # epoch
|
| 164 |
+
[0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *)
|
| 165 |
+
(?: # pre release
|
| 166 |
+
[-_\.]?
|
| 167 |
+
(alpha|beta|preview|pre|a|b|c|rc)
|
| 168 |
+
[-_\.]?
|
| 169 |
+
[0-9]*
|
| 170 |
+
)?
|
| 171 |
+
(?: # post release
|
| 172 |
+
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
|
| 173 |
+
)?
|
| 174 |
+
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
|
| 175 |
+
)
|
| 176 |
+
|
|
| 177 |
+
(?:
|
| 178 |
+
# All other operators only allow a sub set of what the
|
| 179 |
+
# (non)equality operators do. Specifically they do not allow
|
| 180 |
+
# local versions to be specified nor do they allow the prefix
|
| 181 |
+
# matching wild cards.
|
| 182 |
+
(?<!==|!=|~=) # We have special cases for these
|
| 183 |
+
# operators so we want to make sure they
|
| 184 |
+
# don't match here.
|
| 185 |
+
|
| 186 |
+
\s*
|
| 187 |
+
v?
|
| 188 |
+
(?:[0-9]+!)? # epoch
|
| 189 |
+
[0-9]+(?:\.[0-9]+)* # release
|
| 190 |
+
(?: # pre release
|
| 191 |
+
[-_\.]?
|
| 192 |
+
(alpha|beta|preview|pre|a|b|c|rc)
|
| 193 |
+
[-_\.]?
|
| 194 |
+
[0-9]*
|
| 195 |
+
)?
|
| 196 |
+
(?: # post release
|
| 197 |
+
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
|
| 198 |
+
)?
|
| 199 |
+
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
|
| 200 |
+
)
|
| 201 |
+
)
|
| 202 |
+
"""
|
| 203 |
+
|
| 204 |
+
_regex = re.compile(
|
| 205 |
+
r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$",
|
| 206 |
+
re.VERBOSE | re.IGNORECASE,
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
_operators = {
|
| 210 |
+
"~=": "compatible",
|
| 211 |
+
"==": "equal",
|
| 212 |
+
"!=": "not_equal",
|
| 213 |
+
"<=": "less_than_equal",
|
| 214 |
+
">=": "greater_than_equal",
|
| 215 |
+
"<": "less_than",
|
| 216 |
+
">": "greater_than",
|
| 217 |
+
"===": "arbitrary",
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None:
|
| 221 |
+
"""Initialize a Specifier instance.
|
| 222 |
+
|
| 223 |
+
:param spec:
|
| 224 |
+
The string representation of a specifier which will be parsed and
|
| 225 |
+
normalized before use.
|
| 226 |
+
:param prereleases:
|
| 227 |
+
This tells the specifier if it should accept prerelease versions if
|
| 228 |
+
applicable or not. The default of ``None`` will autodetect it from the
|
| 229 |
+
given specifiers.
|
| 230 |
+
:raises InvalidSpecifier:
|
| 231 |
+
If the given specifier is invalid (i.e. bad syntax).
|
| 232 |
+
"""
|
| 233 |
+
match = self._regex.search(spec)
|
| 234 |
+
if not match:
|
| 235 |
+
raise InvalidSpecifier(f"Invalid specifier: '{spec}'")
|
| 236 |
+
|
| 237 |
+
self._spec: Tuple[str, str] = (
|
| 238 |
+
match.group("operator").strip(),
|
| 239 |
+
match.group("version").strip(),
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Store whether or not this Specifier should accept prereleases
|
| 243 |
+
self._prereleases = prereleases
|
| 244 |
+
|
| 245 |
+
# https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
|
| 246 |
+
@property # type: ignore[override]
|
| 247 |
+
def prereleases(self) -> bool:
|
| 248 |
+
# If there is an explicit prereleases set for this, then we'll just
|
| 249 |
+
# blindly use that.
|
| 250 |
+
if self._prereleases is not None:
|
| 251 |
+
return self._prereleases
|
| 252 |
+
|
| 253 |
+
# Look at all of our specifiers and determine if they are inclusive
|
| 254 |
+
# operators, and if they are if they are including an explicit
|
| 255 |
+
# prerelease.
|
| 256 |
+
operator, version = self._spec
|
| 257 |
+
if operator in ["==", ">=", "<=", "~=", "==="]:
|
| 258 |
+
# The == specifier can include a trailing .*, if it does we
|
| 259 |
+
# want to remove before parsing.
|
| 260 |
+
if operator == "==" and version.endswith(".*"):
|
| 261 |
+
version = version[:-2]
|
| 262 |
+
|
| 263 |
+
# Parse the version, and if it is a pre-release than this
|
| 264 |
+
# specifier allows pre-releases.
|
| 265 |
+
if Version(version).is_prerelease:
|
| 266 |
+
return True
|
| 267 |
+
|
| 268 |
+
return False
|
| 269 |
+
|
| 270 |
+
@prereleases.setter
|
| 271 |
+
def prereleases(self, value: bool) -> None:
|
| 272 |
+
self._prereleases = value
|
| 273 |
+
|
| 274 |
+
@property
|
| 275 |
+
def operator(self) -> str:
|
| 276 |
+
"""The operator of this specifier.
|
| 277 |
+
|
| 278 |
+
>>> Specifier("==1.2.3").operator
|
| 279 |
+
'=='
|
| 280 |
+
"""
|
| 281 |
+
return self._spec[0]
|
| 282 |
+
|
| 283 |
+
@property
|
| 284 |
+
def version(self) -> str:
|
| 285 |
+
"""The version of this specifier.
|
| 286 |
+
|
| 287 |
+
>>> Specifier("==1.2.3").version
|
| 288 |
+
'1.2.3'
|
| 289 |
+
"""
|
| 290 |
+
return self._spec[1]
|
| 291 |
+
|
| 292 |
+
def __repr__(self) -> str:
|
| 293 |
+
"""A representation of the Specifier that shows all internal state.
|
| 294 |
+
|
| 295 |
+
>>> Specifier('>=1.0.0')
|
| 296 |
+
<Specifier('>=1.0.0')>
|
| 297 |
+
>>> Specifier('>=1.0.0', prereleases=False)
|
| 298 |
+
<Specifier('>=1.0.0', prereleases=False)>
|
| 299 |
+
>>> Specifier('>=1.0.0', prereleases=True)
|
| 300 |
+
<Specifier('>=1.0.0', prereleases=True)>
|
| 301 |
+
"""
|
| 302 |
+
pre = (
|
| 303 |
+
f", prereleases={self.prereleases!r}"
|
| 304 |
+
if self._prereleases is not None
|
| 305 |
+
else ""
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
return f"<{self.__class__.__name__}({str(self)!r}{pre})>"
|
| 309 |
+
|
| 310 |
+
def __str__(self) -> str:
|
| 311 |
+
"""A string representation of the Specifier that can be round-tripped.
|
| 312 |
+
|
| 313 |
+
>>> str(Specifier('>=1.0.0'))
|
| 314 |
+
'>=1.0.0'
|
| 315 |
+
>>> str(Specifier('>=1.0.0', prereleases=False))
|
| 316 |
+
'>=1.0.0'
|
| 317 |
+
"""
|
| 318 |
+
return "{}{}".format(*self._spec)
|
| 319 |
+
|
| 320 |
+
@property
|
| 321 |
+
def _canonical_spec(self) -> Tuple[str, str]:
|
| 322 |
+
canonical_version = canonicalize_version(
|
| 323 |
+
self._spec[1],
|
| 324 |
+
strip_trailing_zero=(self._spec[0] != "~="),
|
| 325 |
+
)
|
| 326 |
+
return self._spec[0], canonical_version
|
| 327 |
+
|
| 328 |
+
def __hash__(self) -> int:
|
| 329 |
+
return hash(self._canonical_spec)
|
| 330 |
+
|
| 331 |
+
def __eq__(self, other: object) -> bool:
|
| 332 |
+
"""Whether or not the two Specifier-like objects are equal.
|
| 333 |
+
|
| 334 |
+
:param other: The other object to check against.
|
| 335 |
+
|
| 336 |
+
The value of :attr:`prereleases` is ignored.
|
| 337 |
+
|
| 338 |
+
>>> Specifier("==1.2.3") == Specifier("== 1.2.3.0")
|
| 339 |
+
True
|
| 340 |
+
>>> (Specifier("==1.2.3", prereleases=False) ==
|
| 341 |
+
... Specifier("==1.2.3", prereleases=True))
|
| 342 |
+
True
|
| 343 |
+
>>> Specifier("==1.2.3") == "==1.2.3"
|
| 344 |
+
True
|
| 345 |
+
>>> Specifier("==1.2.3") == Specifier("==1.2.4")
|
| 346 |
+
False
|
| 347 |
+
>>> Specifier("==1.2.3") == Specifier("~=1.2.3")
|
| 348 |
+
False
|
| 349 |
+
"""
|
| 350 |
+
if isinstance(other, str):
|
| 351 |
+
try:
|
| 352 |
+
other = self.__class__(str(other))
|
| 353 |
+
except InvalidSpecifier:
|
| 354 |
+
return NotImplemented
|
| 355 |
+
elif not isinstance(other, self.__class__):
|
| 356 |
+
return NotImplemented
|
| 357 |
+
|
| 358 |
+
return self._canonical_spec == other._canonical_spec
|
| 359 |
+
|
| 360 |
+
def _get_operator(self, op: str) -> CallableOperator:
|
| 361 |
+
operator_callable: CallableOperator = getattr(
|
| 362 |
+
self, f"_compare_{self._operators[op]}"
|
| 363 |
+
)
|
| 364 |
+
return operator_callable
|
| 365 |
+
|
| 366 |
+
def _compare_compatible(self, prospective: Version, spec: str) -> bool:
|
| 367 |
+
# Compatible releases have an equivalent combination of >= and ==. That
|
| 368 |
+
# is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to
|
| 369 |
+
# implement this in terms of the other specifiers instead of
|
| 370 |
+
# implementing it ourselves. The only thing we need to do is construct
|
| 371 |
+
# the other specifiers.
|
| 372 |
+
|
| 373 |
+
# We want everything but the last item in the version, but we want to
|
| 374 |
+
# ignore suffix segments.
|
| 375 |
+
prefix = _version_join(
|
| 376 |
+
list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1]
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
# Add the prefix notation to the end of our string
|
| 380 |
+
prefix += ".*"
|
| 381 |
+
|
| 382 |
+
return self._get_operator(">=")(prospective, spec) and self._get_operator("==")(
|
| 383 |
+
prospective, prefix
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
def _compare_equal(self, prospective: Version, spec: str) -> bool:
|
| 387 |
+
# We need special logic to handle prefix matching
|
| 388 |
+
if spec.endswith(".*"):
|
| 389 |
+
# In the case of prefix matching we want to ignore local segment.
|
| 390 |
+
normalized_prospective = canonicalize_version(
|
| 391 |
+
prospective.public, strip_trailing_zero=False
|
| 392 |
+
)
|
| 393 |
+
# Get the normalized version string ignoring the trailing .*
|
| 394 |
+
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
|
| 395 |
+
# Split the spec out by bangs and dots, and pretend that there is
|
| 396 |
+
# an implicit dot in between a release segment and a pre-release segment.
|
| 397 |
+
split_spec = _version_split(normalized_spec)
|
| 398 |
+
|
| 399 |
+
# Split the prospective version out by bangs and dots, and pretend
|
| 400 |
+
# that there is an implicit dot in between a release segment and
|
| 401 |
+
# a pre-release segment.
|
| 402 |
+
split_prospective = _version_split(normalized_prospective)
|
| 403 |
+
|
| 404 |
+
# 0-pad the prospective version before shortening it to get the correct
|
| 405 |
+
# shortened version.
|
| 406 |
+
padded_prospective, _ = _pad_version(split_prospective, split_spec)
|
| 407 |
+
|
| 408 |
+
# Shorten the prospective version to be the same length as the spec
|
| 409 |
+
# so that we can determine if the specifier is a prefix of the
|
| 410 |
+
# prospective version or not.
|
| 411 |
+
shortened_prospective = padded_prospective[: len(split_spec)]
|
| 412 |
+
|
| 413 |
+
return shortened_prospective == split_spec
|
| 414 |
+
else:
|
| 415 |
+
# Convert our spec string into a Version
|
| 416 |
+
spec_version = Version(spec)
|
| 417 |
+
|
| 418 |
+
# If the specifier does not have a local segment, then we want to
|
| 419 |
+
# act as if the prospective version also does not have a local
|
| 420 |
+
# segment.
|
| 421 |
+
if not spec_version.local:
|
| 422 |
+
prospective = Version(prospective.public)
|
| 423 |
+
|
| 424 |
+
return prospective == spec_version
|
| 425 |
+
|
| 426 |
+
def _compare_not_equal(self, prospective: Version, spec: str) -> bool:
|
| 427 |
+
return not self._compare_equal(prospective, spec)
|
| 428 |
+
|
| 429 |
+
def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool:
|
| 430 |
+
# NB: Local version identifiers are NOT permitted in the version
|
| 431 |
+
# specifier, so local version labels can be universally removed from
|
| 432 |
+
# the prospective version.
|
| 433 |
+
return Version(prospective.public) <= Version(spec)
|
| 434 |
+
|
| 435 |
+
def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool:
|
| 436 |
+
# NB: Local version identifiers are NOT permitted in the version
|
| 437 |
+
# specifier, so local version labels can be universally removed from
|
| 438 |
+
# the prospective version.
|
| 439 |
+
return Version(prospective.public) >= Version(spec)
|
| 440 |
+
|
| 441 |
+
def _compare_less_than(self, prospective: Version, spec_str: str) -> bool:
|
| 442 |
+
# Convert our spec to a Version instance, since we'll want to work with
|
| 443 |
+
# it as a version.
|
| 444 |
+
spec = Version(spec_str)
|
| 445 |
+
|
| 446 |
+
# Check to see if the prospective version is less than the spec
|
| 447 |
+
# version. If it's not we can short circuit and just return False now
|
| 448 |
+
# instead of doing extra unneeded work.
|
| 449 |
+
if not prospective < spec:
|
| 450 |
+
return False
|
| 451 |
+
|
| 452 |
+
# This special case is here so that, unless the specifier itself
|
| 453 |
+
# includes is a pre-release version, that we do not accept pre-release
|
| 454 |
+
# versions for the version mentioned in the specifier (e.g. <3.1 should
|
| 455 |
+
# not match 3.1.dev0, but should match 3.0.dev0).
|
| 456 |
+
if not spec.is_prerelease and prospective.is_prerelease:
|
| 457 |
+
if Version(prospective.base_version) == Version(spec.base_version):
|
| 458 |
+
return False
|
| 459 |
+
|
| 460 |
+
# If we've gotten to here, it means that prospective version is both
|
| 461 |
+
# less than the spec version *and* it's not a pre-release of the same
|
| 462 |
+
# version in the spec.
|
| 463 |
+
return True
|
| 464 |
+
|
| 465 |
+
def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool:
|
| 466 |
+
# Convert our spec to a Version instance, since we'll want to work with
|
| 467 |
+
# it as a version.
|
| 468 |
+
spec = Version(spec_str)
|
| 469 |
+
|
| 470 |
+
# Check to see if the prospective version is greater than the spec
|
| 471 |
+
# version. If it's not we can short circuit and just return False now
|
| 472 |
+
# instead of doing extra unneeded work.
|
| 473 |
+
if not prospective > spec:
|
| 474 |
+
return False
|
| 475 |
+
|
| 476 |
+
# This special case is here so that, unless the specifier itself
|
| 477 |
+
# includes is a post-release version, that we do not accept
|
| 478 |
+
# post-release versions for the version mentioned in the specifier
|
| 479 |
+
# (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0).
|
| 480 |
+
if not spec.is_postrelease and prospective.is_postrelease:
|
| 481 |
+
if Version(prospective.base_version) == Version(spec.base_version):
|
| 482 |
+
return False
|
| 483 |
+
|
| 484 |
+
# Ensure that we do not allow a local version of the version mentioned
|
| 485 |
+
# in the specifier, which is technically greater than, to match.
|
| 486 |
+
if prospective.local is not None:
|
| 487 |
+
if Version(prospective.base_version) == Version(spec.base_version):
|
| 488 |
+
return False
|
| 489 |
+
|
| 490 |
+
# If we've gotten to here, it means that prospective version is both
|
| 491 |
+
# greater than the spec version *and* it's not a pre-release of the
|
| 492 |
+
# same version in the spec.
|
| 493 |
+
return True
|
| 494 |
+
|
| 495 |
+
def _compare_arbitrary(self, prospective: Version, spec: str) -> bool:
|
| 496 |
+
return str(prospective).lower() == str(spec).lower()
|
| 497 |
+
|
| 498 |
+
def __contains__(self, item: Union[str, Version]) -> bool:
|
| 499 |
+
"""Return whether or not the item is contained in this specifier.
|
| 500 |
+
|
| 501 |
+
:param item: The item to check for.
|
| 502 |
+
|
| 503 |
+
This is used for the ``in`` operator and behaves the same as
|
| 504 |
+
:meth:`contains` with no ``prereleases`` argument passed.
|
| 505 |
+
|
| 506 |
+
>>> "1.2.3" in Specifier(">=1.2.3")
|
| 507 |
+
True
|
| 508 |
+
>>> Version("1.2.3") in Specifier(">=1.2.3")
|
| 509 |
+
True
|
| 510 |
+
>>> "1.0.0" in Specifier(">=1.2.3")
|
| 511 |
+
False
|
| 512 |
+
>>> "1.3.0a1" in Specifier(">=1.2.3")
|
| 513 |
+
False
|
| 514 |
+
>>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True)
|
| 515 |
+
True
|
| 516 |
+
"""
|
| 517 |
+
return self.contains(item)
|
| 518 |
+
|
| 519 |
+
def contains(
|
| 520 |
+
self, item: UnparsedVersion, prereleases: Optional[bool] = None
|
| 521 |
+
) -> bool:
|
| 522 |
+
"""Return whether or not the item is contained in this specifier.
|
| 523 |
+
|
| 524 |
+
:param item:
|
| 525 |
+
The item to check for, which can be a version string or a
|
| 526 |
+
:class:`Version` instance.
|
| 527 |
+
:param prereleases:
|
| 528 |
+
Whether or not to match prereleases with this Specifier. If set to
|
| 529 |
+
``None`` (the default), it uses :attr:`prereleases` to determine
|
| 530 |
+
whether or not prereleases are allowed.
|
| 531 |
+
|
| 532 |
+
>>> Specifier(">=1.2.3").contains("1.2.3")
|
| 533 |
+
True
|
| 534 |
+
>>> Specifier(">=1.2.3").contains(Version("1.2.3"))
|
| 535 |
+
True
|
| 536 |
+
>>> Specifier(">=1.2.3").contains("1.0.0")
|
| 537 |
+
False
|
| 538 |
+
>>> Specifier(">=1.2.3").contains("1.3.0a1")
|
| 539 |
+
False
|
| 540 |
+
>>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1")
|
| 541 |
+
True
|
| 542 |
+
>>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True)
|
| 543 |
+
True
|
| 544 |
+
"""
|
| 545 |
+
|
| 546 |
+
# Determine if prereleases are to be allowed or not.
|
| 547 |
+
if prereleases is None:
|
| 548 |
+
prereleases = self.prereleases
|
| 549 |
+
|
| 550 |
+
# Normalize item to a Version, this allows us to have a shortcut for
|
| 551 |
+
# "2.0" in Specifier(">=2")
|
| 552 |
+
normalized_item = _coerce_version(item)
|
| 553 |
+
|
| 554 |
+
# Determine if we should be supporting prereleases in this specifier
|
| 555 |
+
# or not, if we do not support prereleases than we can short circuit
|
| 556 |
+
# logic if this version is a prereleases.
|
| 557 |
+
if normalized_item.is_prerelease and not prereleases:
|
| 558 |
+
return False
|
| 559 |
+
|
| 560 |
+
# Actually do the comparison to determine if this item is contained
|
| 561 |
+
# within this Specifier or not.
|
| 562 |
+
operator_callable: CallableOperator = self._get_operator(self.operator)
|
| 563 |
+
return operator_callable(normalized_item, self.version)
|
| 564 |
+
|
| 565 |
+
def filter(
|
| 566 |
+
self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None
|
| 567 |
+
) -> Iterator[UnparsedVersionVar]:
|
| 568 |
+
"""Filter items in the given iterable, that match the specifier.
|
| 569 |
+
|
| 570 |
+
:param iterable:
|
| 571 |
+
An iterable that can contain version strings and :class:`Version` instances.
|
| 572 |
+
The items in the iterable will be filtered according to the specifier.
|
| 573 |
+
:param prereleases:
|
| 574 |
+
Whether or not to allow prereleases in the returned iterator. If set to
|
| 575 |
+
``None`` (the default), it will be intelligently decide whether to allow
|
| 576 |
+
prereleases or not (based on the :attr:`prereleases` attribute, and
|
| 577 |
+
whether the only versions matching are prereleases).
|
| 578 |
+
|
| 579 |
+
This method is smarter than just ``filter(Specifier().contains, [...])``
|
| 580 |
+
because it implements the rule from :pep:`440` that a prerelease item
|
| 581 |
+
SHOULD be accepted if no other versions match the given specifier.
|
| 582 |
+
|
| 583 |
+
>>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"]))
|
| 584 |
+
['1.3']
|
| 585 |
+
>>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")]))
|
| 586 |
+
['1.2.3', '1.3', <Version('1.4')>]
|
| 587 |
+
>>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"]))
|
| 588 |
+
['1.5a1']
|
| 589 |
+
>>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True))
|
| 590 |
+
['1.3', '1.5a1']
|
| 591 |
+
>>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"]))
|
| 592 |
+
['1.3', '1.5a1']
|
| 593 |
+
"""
|
| 594 |
+
|
| 595 |
+
yielded = False
|
| 596 |
+
found_prereleases = []
|
| 597 |
+
|
| 598 |
+
kw = {"prereleases": prereleases if prereleases is not None else True}
|
| 599 |
+
|
| 600 |
+
# Attempt to iterate over all the values in the iterable and if any of
|
| 601 |
+
# them match, yield them.
|
| 602 |
+
for version in iterable:
|
| 603 |
+
parsed_version = _coerce_version(version)
|
| 604 |
+
|
| 605 |
+
if self.contains(parsed_version, **kw):
|
| 606 |
+
# If our version is a prerelease, and we were not set to allow
|
| 607 |
+
# prereleases, then we'll store it for later in case nothing
|
| 608 |
+
# else matches this specifier.
|
| 609 |
+
if parsed_version.is_prerelease and not (
|
| 610 |
+
prereleases or self.prereleases
|
| 611 |
+
):
|
| 612 |
+
found_prereleases.append(version)
|
| 613 |
+
# Either this is not a prerelease, or we should have been
|
| 614 |
+
# accepting prereleases from the beginning.
|
| 615 |
+
else:
|
| 616 |
+
yielded = True
|
| 617 |
+
yield version
|
| 618 |
+
|
| 619 |
+
# Now that we've iterated over everything, determine if we've yielded
|
| 620 |
+
# any values, and if we have not and we have any prereleases stored up
|
| 621 |
+
# then we will go ahead and yield the prereleases.
|
| 622 |
+
if not yielded and found_prereleases:
|
| 623 |
+
for version in found_prereleases:
|
| 624 |
+
yield version
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$")
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
def _version_split(version: str) -> List[str]:
|
| 631 |
+
"""Split version into components.
|
| 632 |
+
|
| 633 |
+
The split components are intended for version comparison. The logic does
|
| 634 |
+
not attempt to retain the original version string, so joining the
|
| 635 |
+
components back with :func:`_version_join` may not produce the original
|
| 636 |
+
version string.
|
| 637 |
+
"""
|
| 638 |
+
result: List[str] = []
|
| 639 |
+
|
| 640 |
+
epoch, _, rest = version.rpartition("!")
|
| 641 |
+
result.append(epoch or "0")
|
| 642 |
+
|
| 643 |
+
for item in rest.split("."):
|
| 644 |
+
match = _prefix_regex.search(item)
|
| 645 |
+
if match:
|
| 646 |
+
result.extend(match.groups())
|
| 647 |
+
else:
|
| 648 |
+
result.append(item)
|
| 649 |
+
return result
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
def _version_join(components: List[str]) -> str:
|
| 653 |
+
"""Join split version components into a version string.
|
| 654 |
+
|
| 655 |
+
This function assumes the input came from :func:`_version_split`, where the
|
| 656 |
+
first component must be the epoch (either empty or numeric), and all other
|
| 657 |
+
components numeric.
|
| 658 |
+
"""
|
| 659 |
+
epoch, *rest = components
|
| 660 |
+
return f"{epoch}!{'.'.join(rest)}"
|
| 661 |
+
|
| 662 |
+
|
| 663 |
+
def _is_not_suffix(segment: str) -> bool:
|
| 664 |
+
return not any(
|
| 665 |
+
segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post")
|
| 666 |
+
)
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]:
|
| 670 |
+
left_split, right_split = [], []
|
| 671 |
+
|
| 672 |
+
# Get the release segment of our versions
|
| 673 |
+
left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left)))
|
| 674 |
+
right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right)))
|
| 675 |
+
|
| 676 |
+
# Get the rest of our versions
|
| 677 |
+
left_split.append(left[len(left_split[0]) :])
|
| 678 |
+
right_split.append(right[len(right_split[0]) :])
|
| 679 |
+
|
| 680 |
+
# Insert our padding
|
| 681 |
+
left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0])))
|
| 682 |
+
right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0])))
|
| 683 |
+
|
| 684 |
+
return (
|
| 685 |
+
list(itertools.chain.from_iterable(left_split)),
|
| 686 |
+
list(itertools.chain.from_iterable(right_split)),
|
| 687 |
+
)
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
class SpecifierSet(BaseSpecifier):
|
| 691 |
+
"""This class abstracts handling of a set of version specifiers.
|
| 692 |
+
|
| 693 |
+
It can be passed a single specifier (``>=3.0``), a comma-separated list of
|
| 694 |
+
specifiers (``>=3.0,!=3.1``), or no specifier at all.
|
| 695 |
+
"""
|
| 696 |
+
|
| 697 |
+
def __init__(
|
| 698 |
+
self, specifiers: str = "", prereleases: Optional[bool] = None
|
| 699 |
+
) -> None:
|
| 700 |
+
"""Initialize a SpecifierSet instance.
|
| 701 |
+
|
| 702 |
+
:param specifiers:
|
| 703 |
+
The string representation of a specifier or a comma-separated list of
|
| 704 |
+
specifiers which will be parsed and normalized before use.
|
| 705 |
+
:param prereleases:
|
| 706 |
+
This tells the SpecifierSet if it should accept prerelease versions if
|
| 707 |
+
applicable or not. The default of ``None`` will autodetect it from the
|
| 708 |
+
given specifiers.
|
| 709 |
+
|
| 710 |
+
:raises InvalidSpecifier:
|
| 711 |
+
If the given ``specifiers`` are not parseable than this exception will be
|
| 712 |
+
raised.
|
| 713 |
+
"""
|
| 714 |
+
|
| 715 |
+
# Split on `,` to break each individual specifier into it's own item, and
|
| 716 |
+
# strip each item to remove leading/trailing whitespace.
|
| 717 |
+
split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()]
|
| 718 |
+
|
| 719 |
+
# Make each individual specifier a Specifier and save in a frozen set for later.
|
| 720 |
+
self._specs = frozenset(map(Specifier, split_specifiers))
|
| 721 |
+
|
| 722 |
+
# Store our prereleases value so we can use it later to determine if
|
| 723 |
+
# we accept prereleases or not.
|
| 724 |
+
self._prereleases = prereleases
|
| 725 |
+
|
| 726 |
+
@property
|
| 727 |
+
def prereleases(self) -> Optional[bool]:
|
| 728 |
+
# If we have been given an explicit prerelease modifier, then we'll
|
| 729 |
+
# pass that through here.
|
| 730 |
+
if self._prereleases is not None:
|
| 731 |
+
return self._prereleases
|
| 732 |
+
|
| 733 |
+
# If we don't have any specifiers, and we don't have a forced value,
|
| 734 |
+
# then we'll just return None since we don't know if this should have
|
| 735 |
+
# pre-releases or not.
|
| 736 |
+
if not self._specs:
|
| 737 |
+
return None
|
| 738 |
+
|
| 739 |
+
# Otherwise we'll see if any of the given specifiers accept
|
| 740 |
+
# prereleases, if any of them do we'll return True, otherwise False.
|
| 741 |
+
return any(s.prereleases for s in self._specs)
|
| 742 |
+
|
| 743 |
+
@prereleases.setter
|
| 744 |
+
def prereleases(self, value: bool) -> None:
|
| 745 |
+
self._prereleases = value
|
| 746 |
+
|
| 747 |
+
def __repr__(self) -> str:
|
| 748 |
+
"""A representation of the specifier set that shows all internal state.
|
| 749 |
+
|
| 750 |
+
Note that the ordering of the individual specifiers within the set may not
|
| 751 |
+
match the input string.
|
| 752 |
+
|
| 753 |
+
>>> SpecifierSet('>=1.0.0,!=2.0.0')
|
| 754 |
+
<SpecifierSet('!=2.0.0,>=1.0.0')>
|
| 755 |
+
>>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False)
|
| 756 |
+
<SpecifierSet('!=2.0.0,>=1.0.0', prereleases=False)>
|
| 757 |
+
>>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True)
|
| 758 |
+
<SpecifierSet('!=2.0.0,>=1.0.0', prereleases=True)>
|
| 759 |
+
"""
|
| 760 |
+
pre = (
|
| 761 |
+
f", prereleases={self.prereleases!r}"
|
| 762 |
+
if self._prereleases is not None
|
| 763 |
+
else ""
|
| 764 |
+
)
|
| 765 |
+
|
| 766 |
+
return f"<SpecifierSet({str(self)!r}{pre})>"
|
| 767 |
+
|
| 768 |
+
def __str__(self) -> str:
|
| 769 |
+
"""A string representation of the specifier set that can be round-tripped.
|
| 770 |
+
|
| 771 |
+
Note that the ordering of the individual specifiers within the set may not
|
| 772 |
+
match the input string.
|
| 773 |
+
|
| 774 |
+
>>> str(SpecifierSet(">=1.0.0,!=1.0.1"))
|
| 775 |
+
'!=1.0.1,>=1.0.0'
|
| 776 |
+
>>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False))
|
| 777 |
+
'!=1.0.1,>=1.0.0'
|
| 778 |
+
"""
|
| 779 |
+
return ",".join(sorted(str(s) for s in self._specs))
|
| 780 |
+
|
| 781 |
+
def __hash__(self) -> int:
|
| 782 |
+
return hash(self._specs)
|
| 783 |
+
|
| 784 |
+
def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet":
|
| 785 |
+
"""Return a SpecifierSet which is a combination of the two sets.
|
| 786 |
+
|
| 787 |
+
:param other: The other object to combine with.
|
| 788 |
+
|
| 789 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1'
|
| 790 |
+
<SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')>
|
| 791 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1')
|
| 792 |
+
<SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')>
|
| 793 |
+
"""
|
| 794 |
+
if isinstance(other, str):
|
| 795 |
+
other = SpecifierSet(other)
|
| 796 |
+
elif not isinstance(other, SpecifierSet):
|
| 797 |
+
return NotImplemented
|
| 798 |
+
|
| 799 |
+
specifier = SpecifierSet()
|
| 800 |
+
specifier._specs = frozenset(self._specs | other._specs)
|
| 801 |
+
|
| 802 |
+
if self._prereleases is None and other._prereleases is not None:
|
| 803 |
+
specifier._prereleases = other._prereleases
|
| 804 |
+
elif self._prereleases is not None and other._prereleases is None:
|
| 805 |
+
specifier._prereleases = self._prereleases
|
| 806 |
+
elif self._prereleases == other._prereleases:
|
| 807 |
+
specifier._prereleases = self._prereleases
|
| 808 |
+
else:
|
| 809 |
+
raise ValueError(
|
| 810 |
+
"Cannot combine SpecifierSets with True and False prerelease "
|
| 811 |
+
"overrides."
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
+
return specifier
|
| 815 |
+
|
| 816 |
+
def __eq__(self, other: object) -> bool:
|
| 817 |
+
"""Whether or not the two SpecifierSet-like objects are equal.
|
| 818 |
+
|
| 819 |
+
:param other: The other object to check against.
|
| 820 |
+
|
| 821 |
+
The value of :attr:`prereleases` is ignored.
|
| 822 |
+
|
| 823 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1")
|
| 824 |
+
True
|
| 825 |
+
>>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) ==
|
| 826 |
+
... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True))
|
| 827 |
+
True
|
| 828 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1"
|
| 829 |
+
True
|
| 830 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0")
|
| 831 |
+
False
|
| 832 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2")
|
| 833 |
+
False
|
| 834 |
+
"""
|
| 835 |
+
if isinstance(other, (str, Specifier)):
|
| 836 |
+
other = SpecifierSet(str(other))
|
| 837 |
+
elif not isinstance(other, SpecifierSet):
|
| 838 |
+
return NotImplemented
|
| 839 |
+
|
| 840 |
+
return self._specs == other._specs
|
| 841 |
+
|
| 842 |
+
def __len__(self) -> int:
|
| 843 |
+
"""Returns the number of specifiers in this specifier set."""
|
| 844 |
+
return len(self._specs)
|
| 845 |
+
|
| 846 |
+
def __iter__(self) -> Iterator[Specifier]:
|
| 847 |
+
"""
|
| 848 |
+
Returns an iterator over all the underlying :class:`Specifier` instances
|
| 849 |
+
in this specifier set.
|
| 850 |
+
|
| 851 |
+
>>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str)
|
| 852 |
+
[<Specifier('!=1.0.1')>, <Specifier('>=1.0.0')>]
|
| 853 |
+
"""
|
| 854 |
+
return iter(self._specs)
|
| 855 |
+
|
| 856 |
+
def __contains__(self, item: UnparsedVersion) -> bool:
|
| 857 |
+
"""Return whether or not the item is contained in this specifier.
|
| 858 |
+
|
| 859 |
+
:param item: The item to check for.
|
| 860 |
+
|
| 861 |
+
This is used for the ``in`` operator and behaves the same as
|
| 862 |
+
:meth:`contains` with no ``prereleases`` argument passed.
|
| 863 |
+
|
| 864 |
+
>>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1")
|
| 865 |
+
True
|
| 866 |
+
>>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1")
|
| 867 |
+
True
|
| 868 |
+
>>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1")
|
| 869 |
+
False
|
| 870 |
+
>>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1")
|
| 871 |
+
False
|
| 872 |
+
>>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)
|
| 873 |
+
True
|
| 874 |
+
"""
|
| 875 |
+
return self.contains(item)
|
| 876 |
+
|
| 877 |
+
def contains(
|
| 878 |
+
self,
|
| 879 |
+
item: UnparsedVersion,
|
| 880 |
+
prereleases: Optional[bool] = None,
|
| 881 |
+
installed: Optional[bool] = None,
|
| 882 |
+
) -> bool:
|
| 883 |
+
"""Return whether or not the item is contained in this SpecifierSet.
|
| 884 |
+
|
| 885 |
+
:param item:
|
| 886 |
+
The item to check for, which can be a version string or a
|
| 887 |
+
:class:`Version` instance.
|
| 888 |
+
:param prereleases:
|
| 889 |
+
Whether or not to match prereleases with this SpecifierSet. If set to
|
| 890 |
+
``None`` (the default), it uses :attr:`prereleases` to determine
|
| 891 |
+
whether or not prereleases are allowed.
|
| 892 |
+
|
| 893 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3")
|
| 894 |
+
True
|
| 895 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3"))
|
| 896 |
+
True
|
| 897 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1")
|
| 898 |
+
False
|
| 899 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1")
|
| 900 |
+
False
|
| 901 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1")
|
| 902 |
+
True
|
| 903 |
+
>>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True)
|
| 904 |
+
True
|
| 905 |
+
"""
|
| 906 |
+
# Ensure that our item is a Version instance.
|
| 907 |
+
if not isinstance(item, Version):
|
| 908 |
+
item = Version(item)
|
| 909 |
+
|
| 910 |
+
# Determine if we're forcing a prerelease or not, if we're not forcing
|
| 911 |
+
# one for this particular filter call, then we'll use whatever the
|
| 912 |
+
# SpecifierSet thinks for whether or not we should support prereleases.
|
| 913 |
+
if prereleases is None:
|
| 914 |
+
prereleases = self.prereleases
|
| 915 |
+
|
| 916 |
+
# We can determine if we're going to allow pre-releases by looking to
|
| 917 |
+
# see if any of the underlying items supports them. If none of them do
|
| 918 |
+
# and this item is a pre-release then we do not allow it and we can
|
| 919 |
+
# short circuit that here.
|
| 920 |
+
# Note: This means that 1.0.dev1 would not be contained in something
|
| 921 |
+
# like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0
|
| 922 |
+
if not prereleases and item.is_prerelease:
|
| 923 |
+
return False
|
| 924 |
+
|
| 925 |
+
if installed and item.is_prerelease:
|
| 926 |
+
item = Version(item.base_version)
|
| 927 |
+
|
| 928 |
+
# We simply dispatch to the underlying specs here to make sure that the
|
| 929 |
+
# given version is contained within all of them.
|
| 930 |
+
# Note: This use of all() here means that an empty set of specifiers
|
| 931 |
+
# will always return True, this is an explicit design decision.
|
| 932 |
+
return all(s.contains(item, prereleases=prereleases) for s in self._specs)
|
| 933 |
+
|
| 934 |
+
def filter(
|
| 935 |
+
self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None
|
| 936 |
+
) -> Iterator[UnparsedVersionVar]:
|
| 937 |
+
"""Filter items in the given iterable, that match the specifiers in this set.
|
| 938 |
+
|
| 939 |
+
:param iterable:
|
| 940 |
+
An iterable that can contain version strings and :class:`Version` instances.
|
| 941 |
+
The items in the iterable will be filtered according to the specifier.
|
| 942 |
+
:param prereleases:
|
| 943 |
+
Whether or not to allow prereleases in the returned iterator. If set to
|
| 944 |
+
``None`` (the default), it will be intelligently decide whether to allow
|
| 945 |
+
prereleases or not (based on the :attr:`prereleases` attribute, and
|
| 946 |
+
whether the only versions matching are prereleases).
|
| 947 |
+
|
| 948 |
+
This method is smarter than just ``filter(SpecifierSet(...).contains, [...])``
|
| 949 |
+
because it implements the rule from :pep:`440` that a prerelease item
|
| 950 |
+
SHOULD be accepted if no other versions match the given specifier.
|
| 951 |
+
|
| 952 |
+
>>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"]))
|
| 953 |
+
['1.3']
|
| 954 |
+
>>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")]))
|
| 955 |
+
['1.3', <Version('1.4')>]
|
| 956 |
+
>>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"]))
|
| 957 |
+
[]
|
| 958 |
+
>>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True))
|
| 959 |
+
['1.3', '1.5a1']
|
| 960 |
+
>>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"]))
|
| 961 |
+
['1.3', '1.5a1']
|
| 962 |
+
|
| 963 |
+
An "empty" SpecifierSet will filter items based on the presence of prerelease
|
| 964 |
+
versions in the set.
|
| 965 |
+
|
| 966 |
+
>>> list(SpecifierSet("").filter(["1.3", "1.5a1"]))
|
| 967 |
+
['1.3']
|
| 968 |
+
>>> list(SpecifierSet("").filter(["1.5a1"]))
|
| 969 |
+
['1.5a1']
|
| 970 |
+
>>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"]))
|
| 971 |
+
['1.3', '1.5a1']
|
| 972 |
+
>>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True))
|
| 973 |
+
['1.3', '1.5a1']
|
| 974 |
+
"""
|
| 975 |
+
# Determine if we're forcing a prerelease or not, if we're not forcing
|
| 976 |
+
# one for this particular filter call, then we'll use whatever the
|
| 977 |
+
# SpecifierSet thinks for whether or not we should support prereleases.
|
| 978 |
+
if prereleases is None:
|
| 979 |
+
prereleases = self.prereleases
|
| 980 |
+
|
| 981 |
+
# If we have any specifiers, then we want to wrap our iterable in the
|
| 982 |
+
# filter method for each one, this will act as a logical AND amongst
|
| 983 |
+
# each specifier.
|
| 984 |
+
if self._specs:
|
| 985 |
+
for spec in self._specs:
|
| 986 |
+
iterable = spec.filter(iterable, prereleases=bool(prereleases))
|
| 987 |
+
return iter(iterable)
|
| 988 |
+
# If we do not have any specifiers, then we need to have a rough filter
|
| 989 |
+
# which will filter out any pre-releases, unless there are no final
|
| 990 |
+
# releases.
|
| 991 |
+
else:
|
| 992 |
+
filtered: List[UnparsedVersionVar] = []
|
| 993 |
+
found_prereleases: List[UnparsedVersionVar] = []
|
| 994 |
+
|
| 995 |
+
for item in iterable:
|
| 996 |
+
parsed_version = _coerce_version(item)
|
| 997 |
+
|
| 998 |
+
# Store any item which is a pre-release for later unless we've
|
| 999 |
+
# already found a final version or we are accepting prereleases
|
| 1000 |
+
if parsed_version.is_prerelease and not prereleases:
|
| 1001 |
+
if not filtered:
|
| 1002 |
+
found_prereleases.append(item)
|
| 1003 |
+
else:
|
| 1004 |
+
filtered.append(item)
|
| 1005 |
+
|
| 1006 |
+
# If we've found no items except for pre-releases, then we'll go
|
| 1007 |
+
# ahead and use the pre-releases
|
| 1008 |
+
if not filtered and found_prereleases and prereleases is None:
|
| 1009 |
+
return iter(found_prereleases)
|
| 1010 |
+
|
| 1011 |
+
return iter(filtered)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/tags.py
ADDED
|
@@ -0,0 +1,571 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import platform
|
| 7 |
+
import re
|
| 8 |
+
import struct
|
| 9 |
+
import subprocess
|
| 10 |
+
import sys
|
| 11 |
+
import sysconfig
|
| 12 |
+
from importlib.machinery import EXTENSION_SUFFIXES
|
| 13 |
+
from typing import (
|
| 14 |
+
Dict,
|
| 15 |
+
FrozenSet,
|
| 16 |
+
Iterable,
|
| 17 |
+
Iterator,
|
| 18 |
+
List,
|
| 19 |
+
Optional,
|
| 20 |
+
Sequence,
|
| 21 |
+
Tuple,
|
| 22 |
+
Union,
|
| 23 |
+
cast,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
from . import _manylinux, _musllinux
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
PythonVersion = Sequence[int]
|
| 31 |
+
MacVersion = Tuple[int, int]
|
| 32 |
+
|
| 33 |
+
INTERPRETER_SHORT_NAMES: Dict[str, str] = {
|
| 34 |
+
"python": "py", # Generic.
|
| 35 |
+
"cpython": "cp",
|
| 36 |
+
"pypy": "pp",
|
| 37 |
+
"ironpython": "ip",
|
| 38 |
+
"jython": "jy",
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
_32_BIT_INTERPRETER = struct.calcsize("P") == 4
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class Tag:
|
| 46 |
+
"""
|
| 47 |
+
A representation of the tag triple for a wheel.
|
| 48 |
+
|
| 49 |
+
Instances are considered immutable and thus are hashable. Equality checking
|
| 50 |
+
is also supported.
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
__slots__ = ["_interpreter", "_abi", "_platform", "_hash"]
|
| 54 |
+
|
| 55 |
+
def __init__(self, interpreter: str, abi: str, platform: str) -> None:
|
| 56 |
+
self._interpreter = interpreter.lower()
|
| 57 |
+
self._abi = abi.lower()
|
| 58 |
+
self._platform = platform.lower()
|
| 59 |
+
# The __hash__ of every single element in a Set[Tag] will be evaluated each time
|
| 60 |
+
# that a set calls its `.disjoint()` method, which may be called hundreds of
|
| 61 |
+
# times when scanning a page of links for packages with tags matching that
|
| 62 |
+
# Set[Tag]. Pre-computing the value here produces significant speedups for
|
| 63 |
+
# downstream consumers.
|
| 64 |
+
self._hash = hash((self._interpreter, self._abi, self._platform))
|
| 65 |
+
|
| 66 |
+
@property
|
| 67 |
+
def interpreter(self) -> str:
|
| 68 |
+
return self._interpreter
|
| 69 |
+
|
| 70 |
+
@property
|
| 71 |
+
def abi(self) -> str:
|
| 72 |
+
return self._abi
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def platform(self) -> str:
|
| 76 |
+
return self._platform
|
| 77 |
+
|
| 78 |
+
def __eq__(self, other: object) -> bool:
|
| 79 |
+
if not isinstance(other, Tag):
|
| 80 |
+
return NotImplemented
|
| 81 |
+
|
| 82 |
+
return (
|
| 83 |
+
(self._hash == other._hash) # Short-circuit ASAP for perf reasons.
|
| 84 |
+
and (self._platform == other._platform)
|
| 85 |
+
and (self._abi == other._abi)
|
| 86 |
+
and (self._interpreter == other._interpreter)
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
def __hash__(self) -> int:
|
| 90 |
+
return self._hash
|
| 91 |
+
|
| 92 |
+
def __str__(self) -> str:
|
| 93 |
+
return f"{self._interpreter}-{self._abi}-{self._platform}"
|
| 94 |
+
|
| 95 |
+
def __repr__(self) -> str:
|
| 96 |
+
return f"<{self} @ {id(self)}>"
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def parse_tag(tag: str) -> FrozenSet[Tag]:
|
| 100 |
+
"""
|
| 101 |
+
Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
|
| 102 |
+
|
| 103 |
+
Returning a set is required due to the possibility that the tag is a
|
| 104 |
+
compressed tag set.
|
| 105 |
+
"""
|
| 106 |
+
tags = set()
|
| 107 |
+
interpreters, abis, platforms = tag.split("-")
|
| 108 |
+
for interpreter in interpreters.split("."):
|
| 109 |
+
for abi in abis.split("."):
|
| 110 |
+
for platform_ in platforms.split("."):
|
| 111 |
+
tags.add(Tag(interpreter, abi, platform_))
|
| 112 |
+
return frozenset(tags)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
|
| 116 |
+
value: Union[int, str, None] = sysconfig.get_config_var(name)
|
| 117 |
+
if value is None and warn:
|
| 118 |
+
logger.debug(
|
| 119 |
+
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
|
| 120 |
+
)
|
| 121 |
+
return value
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _normalize_string(string: str) -> str:
|
| 125 |
+
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _is_threaded_cpython(abis: List[str]) -> bool:
|
| 129 |
+
"""
|
| 130 |
+
Determine if the ABI corresponds to a threaded (`--disable-gil`) build.
|
| 131 |
+
|
| 132 |
+
The threaded builds are indicated by a "t" in the abiflags.
|
| 133 |
+
"""
|
| 134 |
+
if len(abis) == 0:
|
| 135 |
+
return False
|
| 136 |
+
# expect e.g., cp313
|
| 137 |
+
m = re.match(r"cp\d+(.*)", abis[0])
|
| 138 |
+
if not m:
|
| 139 |
+
return False
|
| 140 |
+
abiflags = m.group(1)
|
| 141 |
+
return "t" in abiflags
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool:
|
| 145 |
+
"""
|
| 146 |
+
Determine if the Python version supports abi3.
|
| 147 |
+
|
| 148 |
+
PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`)
|
| 149 |
+
builds do not support abi3.
|
| 150 |
+
"""
|
| 151 |
+
return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
|
| 155 |
+
py_version = tuple(py_version) # To allow for version comparison.
|
| 156 |
+
abis = []
|
| 157 |
+
version = _version_nodot(py_version[:2])
|
| 158 |
+
threading = debug = pymalloc = ucs4 = ""
|
| 159 |
+
with_debug = _get_config_var("Py_DEBUG", warn)
|
| 160 |
+
has_refcount = hasattr(sys, "gettotalrefcount")
|
| 161 |
+
# Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
|
| 162 |
+
# extension modules is the best option.
|
| 163 |
+
# https://github.com/pypa/pip/issues/3383#issuecomment-173267692
|
| 164 |
+
has_ext = "_d.pyd" in EXTENSION_SUFFIXES
|
| 165 |
+
if with_debug or (with_debug is None and (has_refcount or has_ext)):
|
| 166 |
+
debug = "d"
|
| 167 |
+
if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn):
|
| 168 |
+
threading = "t"
|
| 169 |
+
if py_version < (3, 8):
|
| 170 |
+
with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
|
| 171 |
+
if with_pymalloc or with_pymalloc is None:
|
| 172 |
+
pymalloc = "m"
|
| 173 |
+
if py_version < (3, 3):
|
| 174 |
+
unicode_size = _get_config_var("Py_UNICODE_SIZE", warn)
|
| 175 |
+
if unicode_size == 4 or (
|
| 176 |
+
unicode_size is None and sys.maxunicode == 0x10FFFF
|
| 177 |
+
):
|
| 178 |
+
ucs4 = "u"
|
| 179 |
+
elif debug:
|
| 180 |
+
# Debug builds can also load "normal" extension modules.
|
| 181 |
+
# We can also assume no UCS-4 or pymalloc requirement.
|
| 182 |
+
abis.append(f"cp{version}{threading}")
|
| 183 |
+
abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}")
|
| 184 |
+
return abis
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def cpython_tags(
|
| 188 |
+
python_version: Optional[PythonVersion] = None,
|
| 189 |
+
abis: Optional[Iterable[str]] = None,
|
| 190 |
+
platforms: Optional[Iterable[str]] = None,
|
| 191 |
+
*,
|
| 192 |
+
warn: bool = False,
|
| 193 |
+
) -> Iterator[Tag]:
|
| 194 |
+
"""
|
| 195 |
+
Yields the tags for a CPython interpreter.
|
| 196 |
+
|
| 197 |
+
The tags consist of:
|
| 198 |
+
- cp<python_version>-<abi>-<platform>
|
| 199 |
+
- cp<python_version>-abi3-<platform>
|
| 200 |
+
- cp<python_version>-none-<platform>
|
| 201 |
+
- cp<less than python_version>-abi3-<platform> # Older Python versions down to 3.2.
|
| 202 |
+
|
| 203 |
+
If python_version only specifies a major version then user-provided ABIs and
|
| 204 |
+
the 'none' ABItag will be used.
|
| 205 |
+
|
| 206 |
+
If 'abi3' or 'none' are specified in 'abis' then they will be yielded at
|
| 207 |
+
their normal position and not at the beginning.
|
| 208 |
+
"""
|
| 209 |
+
if not python_version:
|
| 210 |
+
python_version = sys.version_info[:2]
|
| 211 |
+
|
| 212 |
+
interpreter = f"cp{_version_nodot(python_version[:2])}"
|
| 213 |
+
|
| 214 |
+
if abis is None:
|
| 215 |
+
if len(python_version) > 1:
|
| 216 |
+
abis = _cpython_abis(python_version, warn)
|
| 217 |
+
else:
|
| 218 |
+
abis = []
|
| 219 |
+
abis = list(abis)
|
| 220 |
+
# 'abi3' and 'none' are explicitly handled later.
|
| 221 |
+
for explicit_abi in ("abi3", "none"):
|
| 222 |
+
try:
|
| 223 |
+
abis.remove(explicit_abi)
|
| 224 |
+
except ValueError:
|
| 225 |
+
pass
|
| 226 |
+
|
| 227 |
+
platforms = list(platforms or platform_tags())
|
| 228 |
+
for abi in abis:
|
| 229 |
+
for platform_ in platforms:
|
| 230 |
+
yield Tag(interpreter, abi, platform_)
|
| 231 |
+
|
| 232 |
+
threading = _is_threaded_cpython(abis)
|
| 233 |
+
use_abi3 = _abi3_applies(python_version, threading)
|
| 234 |
+
if use_abi3:
|
| 235 |
+
yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
|
| 236 |
+
yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
|
| 237 |
+
|
| 238 |
+
if use_abi3:
|
| 239 |
+
for minor_version in range(python_version[1] - 1, 1, -1):
|
| 240 |
+
for platform_ in platforms:
|
| 241 |
+
interpreter = "cp{version}".format(
|
| 242 |
+
version=_version_nodot((python_version[0], minor_version))
|
| 243 |
+
)
|
| 244 |
+
yield Tag(interpreter, "abi3", platform_)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _generic_abi() -> List[str]:
|
| 248 |
+
"""
|
| 249 |
+
Return the ABI tag based on EXT_SUFFIX.
|
| 250 |
+
"""
|
| 251 |
+
# The following are examples of `EXT_SUFFIX`.
|
| 252 |
+
# We want to keep the parts which are related to the ABI and remove the
|
| 253 |
+
# parts which are related to the platform:
|
| 254 |
+
# - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310
|
| 255 |
+
# - mac: '.cpython-310-darwin.so' => cp310
|
| 256 |
+
# - win: '.cp310-win_amd64.pyd' => cp310
|
| 257 |
+
# - win: '.pyd' => cp37 (uses _cpython_abis())
|
| 258 |
+
# - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73
|
| 259 |
+
# - graalpy: '.graalpy-38-native-x86_64-darwin.dylib'
|
| 260 |
+
# => graalpy_38_native
|
| 261 |
+
|
| 262 |
+
ext_suffix = _get_config_var("EXT_SUFFIX", warn=True)
|
| 263 |
+
if not isinstance(ext_suffix, str) or ext_suffix[0] != ".":
|
| 264 |
+
raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')")
|
| 265 |
+
parts = ext_suffix.split(".")
|
| 266 |
+
if len(parts) < 3:
|
| 267 |
+
# CPython3.7 and earlier uses ".pyd" on Windows.
|
| 268 |
+
return _cpython_abis(sys.version_info[:2])
|
| 269 |
+
soabi = parts[1]
|
| 270 |
+
if soabi.startswith("cpython"):
|
| 271 |
+
# non-windows
|
| 272 |
+
abi = "cp" + soabi.split("-")[1]
|
| 273 |
+
elif soabi.startswith("cp"):
|
| 274 |
+
# windows
|
| 275 |
+
abi = soabi.split("-")[0]
|
| 276 |
+
elif soabi.startswith("pypy"):
|
| 277 |
+
abi = "-".join(soabi.split("-")[:2])
|
| 278 |
+
elif soabi.startswith("graalpy"):
|
| 279 |
+
abi = "-".join(soabi.split("-")[:3])
|
| 280 |
+
elif soabi:
|
| 281 |
+
# pyston, ironpython, others?
|
| 282 |
+
abi = soabi
|
| 283 |
+
else:
|
| 284 |
+
return []
|
| 285 |
+
return [_normalize_string(abi)]
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def generic_tags(
|
| 289 |
+
interpreter: Optional[str] = None,
|
| 290 |
+
abis: Optional[Iterable[str]] = None,
|
| 291 |
+
platforms: Optional[Iterable[str]] = None,
|
| 292 |
+
*,
|
| 293 |
+
warn: bool = False,
|
| 294 |
+
) -> Iterator[Tag]:
|
| 295 |
+
"""
|
| 296 |
+
Yields the tags for a generic interpreter.
|
| 297 |
+
|
| 298 |
+
The tags consist of:
|
| 299 |
+
- <interpreter>-<abi>-<platform>
|
| 300 |
+
|
| 301 |
+
The "none" ABI will be added if it was not explicitly provided.
|
| 302 |
+
"""
|
| 303 |
+
if not interpreter:
|
| 304 |
+
interp_name = interpreter_name()
|
| 305 |
+
interp_version = interpreter_version(warn=warn)
|
| 306 |
+
interpreter = "".join([interp_name, interp_version])
|
| 307 |
+
if abis is None:
|
| 308 |
+
abis = _generic_abi()
|
| 309 |
+
else:
|
| 310 |
+
abis = list(abis)
|
| 311 |
+
platforms = list(platforms or platform_tags())
|
| 312 |
+
if "none" not in abis:
|
| 313 |
+
abis.append("none")
|
| 314 |
+
for abi in abis:
|
| 315 |
+
for platform_ in platforms:
|
| 316 |
+
yield Tag(interpreter, abi, platform_)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
|
| 320 |
+
"""
|
| 321 |
+
Yields Python versions in descending order.
|
| 322 |
+
|
| 323 |
+
After the latest version, the major-only version will be yielded, and then
|
| 324 |
+
all previous versions of that major version.
|
| 325 |
+
"""
|
| 326 |
+
if len(py_version) > 1:
|
| 327 |
+
yield f"py{_version_nodot(py_version[:2])}"
|
| 328 |
+
yield f"py{py_version[0]}"
|
| 329 |
+
if len(py_version) > 1:
|
| 330 |
+
for minor in range(py_version[1] - 1, -1, -1):
|
| 331 |
+
yield f"py{_version_nodot((py_version[0], minor))}"
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def compatible_tags(
|
| 335 |
+
python_version: Optional[PythonVersion] = None,
|
| 336 |
+
interpreter: Optional[str] = None,
|
| 337 |
+
platforms: Optional[Iterable[str]] = None,
|
| 338 |
+
) -> Iterator[Tag]:
|
| 339 |
+
"""
|
| 340 |
+
Yields the sequence of tags that are compatible with a specific version of Python.
|
| 341 |
+
|
| 342 |
+
The tags consist of:
|
| 343 |
+
- py*-none-<platform>
|
| 344 |
+
- <interpreter>-none-any # ... if `interpreter` is provided.
|
| 345 |
+
- py*-none-any
|
| 346 |
+
"""
|
| 347 |
+
if not python_version:
|
| 348 |
+
python_version = sys.version_info[:2]
|
| 349 |
+
platforms = list(platforms or platform_tags())
|
| 350 |
+
for version in _py_interpreter_range(python_version):
|
| 351 |
+
for platform_ in platforms:
|
| 352 |
+
yield Tag(version, "none", platform_)
|
| 353 |
+
if interpreter:
|
| 354 |
+
yield Tag(interpreter, "none", "any")
|
| 355 |
+
for version in _py_interpreter_range(python_version):
|
| 356 |
+
yield Tag(version, "none", "any")
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
|
| 360 |
+
if not is_32bit:
|
| 361 |
+
return arch
|
| 362 |
+
|
| 363 |
+
if arch.startswith("ppc"):
|
| 364 |
+
return "ppc"
|
| 365 |
+
|
| 366 |
+
return "i386"
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]:
|
| 370 |
+
formats = [cpu_arch]
|
| 371 |
+
if cpu_arch == "x86_64":
|
| 372 |
+
if version < (10, 4):
|
| 373 |
+
return []
|
| 374 |
+
formats.extend(["intel", "fat64", "fat32"])
|
| 375 |
+
|
| 376 |
+
elif cpu_arch == "i386":
|
| 377 |
+
if version < (10, 4):
|
| 378 |
+
return []
|
| 379 |
+
formats.extend(["intel", "fat32", "fat"])
|
| 380 |
+
|
| 381 |
+
elif cpu_arch == "ppc64":
|
| 382 |
+
# TODO: Need to care about 32-bit PPC for ppc64 through 10.2?
|
| 383 |
+
if version > (10, 5) or version < (10, 4):
|
| 384 |
+
return []
|
| 385 |
+
formats.append("fat64")
|
| 386 |
+
|
| 387 |
+
elif cpu_arch == "ppc":
|
| 388 |
+
if version > (10, 6):
|
| 389 |
+
return []
|
| 390 |
+
formats.extend(["fat32", "fat"])
|
| 391 |
+
|
| 392 |
+
if cpu_arch in {"arm64", "x86_64"}:
|
| 393 |
+
formats.append("universal2")
|
| 394 |
+
|
| 395 |
+
if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}:
|
| 396 |
+
formats.append("universal")
|
| 397 |
+
|
| 398 |
+
return formats
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def mac_platforms(
|
| 402 |
+
version: Optional[MacVersion] = None, arch: Optional[str] = None
|
| 403 |
+
) -> Iterator[str]:
|
| 404 |
+
"""
|
| 405 |
+
Yields the platform tags for a macOS system.
|
| 406 |
+
|
| 407 |
+
The `version` parameter is a two-item tuple specifying the macOS version to
|
| 408 |
+
generate platform tags for. The `arch` parameter is the CPU architecture to
|
| 409 |
+
generate platform tags for. Both parameters default to the appropriate value
|
| 410 |
+
for the current system.
|
| 411 |
+
"""
|
| 412 |
+
version_str, _, cpu_arch = platform.mac_ver()
|
| 413 |
+
if version is None:
|
| 414 |
+
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
|
| 415 |
+
if version == (10, 16):
|
| 416 |
+
# When built against an older macOS SDK, Python will report macOS 10.16
|
| 417 |
+
# instead of the real version.
|
| 418 |
+
version_str = subprocess.run(
|
| 419 |
+
[
|
| 420 |
+
sys.executable,
|
| 421 |
+
"-sS",
|
| 422 |
+
"-c",
|
| 423 |
+
"import platform; print(platform.mac_ver()[0])",
|
| 424 |
+
],
|
| 425 |
+
check=True,
|
| 426 |
+
env={"SYSTEM_VERSION_COMPAT": "0"},
|
| 427 |
+
stdout=subprocess.PIPE,
|
| 428 |
+
text=True,
|
| 429 |
+
).stdout
|
| 430 |
+
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
|
| 431 |
+
else:
|
| 432 |
+
version = version
|
| 433 |
+
if arch is None:
|
| 434 |
+
arch = _mac_arch(cpu_arch)
|
| 435 |
+
else:
|
| 436 |
+
arch = arch
|
| 437 |
+
|
| 438 |
+
if (10, 0) <= version and version < (11, 0):
|
| 439 |
+
# Prior to Mac OS 11, each yearly release of Mac OS bumped the
|
| 440 |
+
# "minor" version number. The major version was always 10.
|
| 441 |
+
for minor_version in range(version[1], -1, -1):
|
| 442 |
+
compat_version = 10, minor_version
|
| 443 |
+
binary_formats = _mac_binary_formats(compat_version, arch)
|
| 444 |
+
for binary_format in binary_formats:
|
| 445 |
+
yield "macosx_{major}_{minor}_{binary_format}".format(
|
| 446 |
+
major=10, minor=minor_version, binary_format=binary_format
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
if version >= (11, 0):
|
| 450 |
+
# Starting with Mac OS 11, each yearly release bumps the major version
|
| 451 |
+
# number. The minor versions are now the midyear updates.
|
| 452 |
+
for major_version in range(version[0], 10, -1):
|
| 453 |
+
compat_version = major_version, 0
|
| 454 |
+
binary_formats = _mac_binary_formats(compat_version, arch)
|
| 455 |
+
for binary_format in binary_formats:
|
| 456 |
+
yield "macosx_{major}_{minor}_{binary_format}".format(
|
| 457 |
+
major=major_version, minor=0, binary_format=binary_format
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
if version >= (11, 0):
|
| 461 |
+
# Mac OS 11 on x86_64 is compatible with binaries from previous releases.
|
| 462 |
+
# Arm64 support was introduced in 11.0, so no Arm binaries from previous
|
| 463 |
+
# releases exist.
|
| 464 |
+
#
|
| 465 |
+
# However, the "universal2" binary format can have a
|
| 466 |
+
# macOS version earlier than 11.0 when the x86_64 part of the binary supports
|
| 467 |
+
# that version of macOS.
|
| 468 |
+
if arch == "x86_64":
|
| 469 |
+
for minor_version in range(16, 3, -1):
|
| 470 |
+
compat_version = 10, minor_version
|
| 471 |
+
binary_formats = _mac_binary_formats(compat_version, arch)
|
| 472 |
+
for binary_format in binary_formats:
|
| 473 |
+
yield "macosx_{major}_{minor}_{binary_format}".format(
|
| 474 |
+
major=compat_version[0],
|
| 475 |
+
minor=compat_version[1],
|
| 476 |
+
binary_format=binary_format,
|
| 477 |
+
)
|
| 478 |
+
else:
|
| 479 |
+
for minor_version in range(16, 3, -1):
|
| 480 |
+
compat_version = 10, minor_version
|
| 481 |
+
binary_format = "universal2"
|
| 482 |
+
yield "macosx_{major}_{minor}_{binary_format}".format(
|
| 483 |
+
major=compat_version[0],
|
| 484 |
+
minor=compat_version[1],
|
| 485 |
+
binary_format=binary_format,
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
|
| 490 |
+
linux = _normalize_string(sysconfig.get_platform())
|
| 491 |
+
if not linux.startswith("linux_"):
|
| 492 |
+
# we should never be here, just yield the sysconfig one and return
|
| 493 |
+
yield linux
|
| 494 |
+
return
|
| 495 |
+
if is_32bit:
|
| 496 |
+
if linux == "linux_x86_64":
|
| 497 |
+
linux = "linux_i686"
|
| 498 |
+
elif linux == "linux_aarch64":
|
| 499 |
+
linux = "linux_armv8l"
|
| 500 |
+
_, arch = linux.split("_", 1)
|
| 501 |
+
archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch])
|
| 502 |
+
yield from _manylinux.platform_tags(archs)
|
| 503 |
+
yield from _musllinux.platform_tags(archs)
|
| 504 |
+
for arch in archs:
|
| 505 |
+
yield f"linux_{arch}"
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
def _generic_platforms() -> Iterator[str]:
|
| 509 |
+
yield _normalize_string(sysconfig.get_platform())
|
| 510 |
+
|
| 511 |
+
|
| 512 |
+
def platform_tags() -> Iterator[str]:
|
| 513 |
+
"""
|
| 514 |
+
Provides the platform tags for this installation.
|
| 515 |
+
"""
|
| 516 |
+
if platform.system() == "Darwin":
|
| 517 |
+
return mac_platforms()
|
| 518 |
+
elif platform.system() == "Linux":
|
| 519 |
+
return _linux_platforms()
|
| 520 |
+
else:
|
| 521 |
+
return _generic_platforms()
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
def interpreter_name() -> str:
|
| 525 |
+
"""
|
| 526 |
+
Returns the name of the running interpreter.
|
| 527 |
+
|
| 528 |
+
Some implementations have a reserved, two-letter abbreviation which will
|
| 529 |
+
be returned when appropriate.
|
| 530 |
+
"""
|
| 531 |
+
name = sys.implementation.name
|
| 532 |
+
return INTERPRETER_SHORT_NAMES.get(name) or name
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def interpreter_version(*, warn: bool = False) -> str:
|
| 536 |
+
"""
|
| 537 |
+
Returns the version of the running interpreter.
|
| 538 |
+
"""
|
| 539 |
+
version = _get_config_var("py_version_nodot", warn=warn)
|
| 540 |
+
if version:
|
| 541 |
+
version = str(version)
|
| 542 |
+
else:
|
| 543 |
+
version = _version_nodot(sys.version_info[:2])
|
| 544 |
+
return version
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
def _version_nodot(version: PythonVersion) -> str:
|
| 548 |
+
return "".join(map(str, version))
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
|
| 552 |
+
"""
|
| 553 |
+
Returns the sequence of tag triples for the running interpreter.
|
| 554 |
+
|
| 555 |
+
The order of the sequence corresponds to priority order for the
|
| 556 |
+
interpreter, from most to least important.
|
| 557 |
+
"""
|
| 558 |
+
|
| 559 |
+
interp_name = interpreter_name()
|
| 560 |
+
if interp_name == "cp":
|
| 561 |
+
yield from cpython_tags(warn=warn)
|
| 562 |
+
else:
|
| 563 |
+
yield from generic_tags()
|
| 564 |
+
|
| 565 |
+
if interp_name == "pp":
|
| 566 |
+
interp = "pp3"
|
| 567 |
+
elif interp_name == "cp":
|
| 568 |
+
interp = "cp" + interpreter_version(warn=warn)
|
| 569 |
+
else:
|
| 570 |
+
interp = None
|
| 571 |
+
yield from compatible_tags(interpreter=interp)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/wheel/vendored/packaging/utils.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
| 2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
| 3 |
+
# for complete details.
|
| 4 |
+
|
| 5 |
+
import re
|
| 6 |
+
from typing import FrozenSet, NewType, Tuple, Union, cast
|
| 7 |
+
|
| 8 |
+
from .tags import Tag, parse_tag
|
| 9 |
+
from .version import InvalidVersion, Version
|
| 10 |
+
|
| 11 |
+
BuildTag = Union[Tuple[()], Tuple[int, str]]
|
| 12 |
+
NormalizedName = NewType("NormalizedName", str)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class InvalidName(ValueError):
|
| 16 |
+
"""
|
| 17 |
+
An invalid distribution name; users should refer to the packaging user guide.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class InvalidWheelFilename(ValueError):
|
| 22 |
+
"""
|
| 23 |
+
An invalid wheel filename was found, users should refer to PEP 427.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class InvalidSdistFilename(ValueError):
|
| 28 |
+
"""
|
| 29 |
+
An invalid sdist filename was found, users should refer to the packaging user guide.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Core metadata spec for `Name`
|
| 34 |
+
_validate_regex = re.compile(
|
| 35 |
+
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
|
| 36 |
+
)
|
| 37 |
+
_canonicalize_regex = re.compile(r"[-_.]+")
|
| 38 |
+
_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$")
|
| 39 |
+
# PEP 427: The build number must start with a digit.
|
| 40 |
+
_build_tag_regex = re.compile(r"(\d+)(.*)")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
|
| 44 |
+
if validate and not _validate_regex.match(name):
|
| 45 |
+
raise InvalidName(f"name is invalid: {name!r}")
|
| 46 |
+
# This is taken from PEP 503.
|
| 47 |
+
value = _canonicalize_regex.sub("-", name).lower()
|
| 48 |
+
return cast(NormalizedName, value)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def is_normalized_name(name: str) -> bool:
|
| 52 |
+
return _normalized_regex.match(name) is not None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def canonicalize_version(
|
| 56 |
+
version: Union[Version, str], *, strip_trailing_zero: bool = True
|
| 57 |
+
) -> str:
|
| 58 |
+
"""
|
| 59 |
+
This is very similar to Version.__str__, but has one subtle difference
|
| 60 |
+
with the way it handles the release segment.
|
| 61 |
+
"""
|
| 62 |
+
if isinstance(version, str):
|
| 63 |
+
try:
|
| 64 |
+
parsed = Version(version)
|
| 65 |
+
except InvalidVersion:
|
| 66 |
+
# Legacy versions cannot be normalized
|
| 67 |
+
return version
|
| 68 |
+
else:
|
| 69 |
+
parsed = version
|
| 70 |
+
|
| 71 |
+
parts = []
|
| 72 |
+
|
| 73 |
+
# Epoch
|
| 74 |
+
if parsed.epoch != 0:
|
| 75 |
+
parts.append(f"{parsed.epoch}!")
|
| 76 |
+
|
| 77 |
+
# Release segment
|
| 78 |
+
release_segment = ".".join(str(x) for x in parsed.release)
|
| 79 |
+
if strip_trailing_zero:
|
| 80 |
+
# NB: This strips trailing '.0's to normalize
|
| 81 |
+
release_segment = re.sub(r"(\.0)+$", "", release_segment)
|
| 82 |
+
parts.append(release_segment)
|
| 83 |
+
|
| 84 |
+
# Pre-release
|
| 85 |
+
if parsed.pre is not None:
|
| 86 |
+
parts.append("".join(str(x) for x in parsed.pre))
|
| 87 |
+
|
| 88 |
+
# Post-release
|
| 89 |
+
if parsed.post is not None:
|
| 90 |
+
parts.append(f".post{parsed.post}")
|
| 91 |
+
|
| 92 |
+
# Development release
|
| 93 |
+
if parsed.dev is not None:
|
| 94 |
+
parts.append(f".dev{parsed.dev}")
|
| 95 |
+
|
| 96 |
+
# Local version segment
|
| 97 |
+
if parsed.local is not None:
|
| 98 |
+
parts.append(f"+{parsed.local}")
|
| 99 |
+
|
| 100 |
+
return "".join(parts)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def parse_wheel_filename(
|
| 104 |
+
filename: str,
|
| 105 |
+
) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]:
|
| 106 |
+
if not filename.endswith(".whl"):
|
| 107 |
+
raise InvalidWheelFilename(
|
| 108 |
+
f"Invalid wheel filename (extension must be '.whl'): {filename}"
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
filename = filename[:-4]
|
| 112 |
+
dashes = filename.count("-")
|
| 113 |
+
if dashes not in (4, 5):
|
| 114 |
+
raise InvalidWheelFilename(
|
| 115 |
+
f"Invalid wheel filename (wrong number of parts): {filename}"
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
parts = filename.split("-", dashes - 2)
|
| 119 |
+
name_part = parts[0]
|
| 120 |
+
# See PEP 427 for the rules on escaping the project name.
|
| 121 |
+
if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
|
| 122 |
+
raise InvalidWheelFilename(f"Invalid project name: {filename}")
|
| 123 |
+
name = canonicalize_name(name_part)
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
version = Version(parts[1])
|
| 127 |
+
except InvalidVersion as e:
|
| 128 |
+
raise InvalidWheelFilename(
|
| 129 |
+
f"Invalid wheel filename (invalid version): {filename}"
|
| 130 |
+
) from e
|
| 131 |
+
|
| 132 |
+
if dashes == 5:
|
| 133 |
+
build_part = parts[2]
|
| 134 |
+
build_match = _build_tag_regex.match(build_part)
|
| 135 |
+
if build_match is None:
|
| 136 |
+
raise InvalidWheelFilename(
|
| 137 |
+
f"Invalid build number: {build_part} in '{filename}'"
|
| 138 |
+
)
|
| 139 |
+
build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
|
| 140 |
+
else:
|
| 141 |
+
build = ()
|
| 142 |
+
tags = parse_tag(parts[-1])
|
| 143 |
+
return (name, version, build, tags)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]:
|
| 147 |
+
if filename.endswith(".tar.gz"):
|
| 148 |
+
file_stem = filename[: -len(".tar.gz")]
|
| 149 |
+
elif filename.endswith(".zip"):
|
| 150 |
+
file_stem = filename[: -len(".zip")]
|
| 151 |
+
else:
|
| 152 |
+
raise InvalidSdistFilename(
|
| 153 |
+
f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
|
| 154 |
+
f" {filename}"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# We are requiring a PEP 440 version, which cannot contain dashes,
|
| 158 |
+
# so we split on the last dash.
|
| 159 |
+
name_part, sep, version_part = file_stem.rpartition("-")
|
| 160 |
+
if not sep:
|
| 161 |
+
raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
|
| 162 |
+
|
| 163 |
+
name = canonicalize_name(name_part)
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
version = Version(version_part)
|
| 167 |
+
except InvalidVersion as e:
|
| 168 |
+
raise InvalidSdistFilename(
|
| 169 |
+
f"Invalid sdist filename (invalid version): {filename}"
|
| 170 |
+
) from e
|
| 171 |
+
|
| 172 |
+
return (name, version)
|