koichi12 commited on
Commit
55ebfe8
·
verified ·
1 Parent(s): a034abf

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc +0 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc +0 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc +0 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc +0 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc +0 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc +0 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc +0 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc +0 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc +0 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc +0 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h +1077 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt +1568 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt +1568 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe +3 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py +4 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc +0 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc +0 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc +0 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc +0 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc +0 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py +519 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py +397 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py +314 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py +137 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py +159 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py +221 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py +0 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc +0 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc +0 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc +0 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py +6 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc +0 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc +0 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py +1 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc +0 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc +0 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc +0 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py +1 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc +0 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc +0 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc +0 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py +411 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py +38 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc +0 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py +19 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc +0 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py +399 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py +132 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py +1096 -0
.gitattributes CHANGED
@@ -65,3 +65,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/_
65
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
66
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
67
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
65
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
66
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
67
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
68
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc ADDED
Binary file (615 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc ADDED
Binary file (4.8 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc ADDED
Binary file (51.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc ADDED
Binary file (13.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc ADDED
Binary file (8.04 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc ADDED
Binary file (30.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc ADDED
Binary file (23.3 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc ADDED
Binary file (5 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc ADDED
Binary file (7.31 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc ADDED
Binary file (10.2 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/curand/include/curand.h ADDED
@@ -0,0 +1,1077 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /* Copyright 2010-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * The source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * The Licensed Deliverables contained herein are PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and are being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(CURAND_H_)
51
+ #define CURAND_H_
52
+
53
+ /**
54
+ * \defgroup HOST Host API
55
+ *
56
+ * @{
57
+ */
58
+ #ifndef __CUDACC_RTC__
59
+ #include <cuda_runtime.h>
60
+ #endif
61
+
62
+ #ifndef CURANDAPI
63
+ #ifdef _WIN32
64
+ #define CURANDAPI __stdcall
65
+ #else
66
+ #define CURANDAPI
67
+ #endif
68
+ #endif
69
+
70
+ #if defined(__cplusplus)
71
+ extern "C" {
72
+ #endif /* __cplusplus */
73
+
74
+ #define CURAND_VER_MAJOR 10
75
+ #define CURAND_VER_MINOR 3
76
+ #define CURAND_VER_PATCH 0
77
+ #define CURAND_VER_BUILD 86
78
+ #define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \
79
+ CURAND_VER_MINOR * 100 + \
80
+ CURAND_VER_PATCH)
81
+ /* CURAND Host API datatypes */
82
+
83
+ /**
84
+ * @{
85
+ */
86
+
87
+ /**
88
+ * CURAND function call status types
89
+ */
90
+ enum curandStatus {
91
+ CURAND_STATUS_SUCCESS = 0, ///< No errors
92
+ CURAND_STATUS_VERSION_MISMATCH = 100, ///< Header file and linked library version do not match
93
+ CURAND_STATUS_NOT_INITIALIZED = 101, ///< Generator not initialized
94
+ CURAND_STATUS_ALLOCATION_FAILED = 102, ///< Memory allocation failed
95
+ CURAND_STATUS_TYPE_ERROR = 103, ///< Generator is wrong type
96
+ CURAND_STATUS_OUT_OF_RANGE = 104, ///< Argument out of range
97
+ CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105, ///< Length requested is not a multple of dimension
98
+ CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106, ///< GPU does not have double precision required by MRG32k3a
99
+ CURAND_STATUS_LAUNCH_FAILURE = 201, ///< Kernel launch failure
100
+ CURAND_STATUS_PREEXISTING_FAILURE = 202, ///< Preexisting failure on library entry
101
+ CURAND_STATUS_INITIALIZATION_FAILED = 203, ///< Initialization of CUDA failed
102
+ CURAND_STATUS_ARCH_MISMATCH = 204, ///< Architecture mismatch, GPU does not support requested feature
103
+ CURAND_STATUS_INTERNAL_ERROR = 999 ///< Internal library error
104
+ };
105
+
106
+ /*
107
+ * CURAND function call status types
108
+ */
109
+ /** \cond UNHIDE_TYPEDEFS */
110
+ typedef enum curandStatus curandStatus_t;
111
+ /** \endcond */
112
+
113
+ /**
114
+ * CURAND generator types
115
+ */
116
+ enum curandRngType {
117
+ CURAND_RNG_TEST = 0,
118
+ CURAND_RNG_PSEUDO_DEFAULT = 100, ///< Default pseudorandom generator
119
+ CURAND_RNG_PSEUDO_XORWOW = 101, ///< XORWOW pseudorandom generator
120
+ CURAND_RNG_PSEUDO_MRG32K3A = 121, ///< MRG32k3a pseudorandom generator
121
+ CURAND_RNG_PSEUDO_MTGP32 = 141, ///< Mersenne Twister MTGP32 pseudorandom generator
122
+ CURAND_RNG_PSEUDO_MT19937 = 142, ///< Mersenne Twister MT19937 pseudorandom generator
123
+ CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161, ///< PHILOX-4x32-10 pseudorandom generator
124
+ CURAND_RNG_QUASI_DEFAULT = 200, ///< Default quasirandom generator
125
+ CURAND_RNG_QUASI_SOBOL32 = 201, ///< Sobol32 quasirandom generator
126
+ CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202, ///< Scrambled Sobol32 quasirandom generator
127
+ CURAND_RNG_QUASI_SOBOL64 = 203, ///< Sobol64 quasirandom generator
128
+ CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204 ///< Scrambled Sobol64 quasirandom generator
129
+ };
130
+
131
+ /*
132
+ * CURAND generator types
133
+ */
134
+ /** \cond UNHIDE_TYPEDEFS */
135
+ typedef enum curandRngType curandRngType_t;
136
+ /** \endcond */
137
+
138
+ /**
139
+ * CURAND ordering of results in memory
140
+ */
141
+ enum curandOrdering {
142
+ CURAND_ORDERING_PSEUDO_BEST = 100, ///< Best ordering for pseudorandom results
143
+ CURAND_ORDERING_PSEUDO_DEFAULT = 101, ///< Specific default thread sequence for pseudorandom results, same as CURAND_ORDERING_PSEUDO_BEST
144
+ CURAND_ORDERING_PSEUDO_SEEDED = 102, ///< Specific seeding pattern for fast lower quality pseudorandom results
145
+ CURAND_ORDERING_PSEUDO_LEGACY = 103, ///< Specific legacy sequence for pseudorandom results, guaranteed to remain the same for all cuRAND release
146
+ CURAND_ORDERING_PSEUDO_DYNAMIC = 104, ///< Specific ordering adjusted to the device it is being executed on, provides the best performance
147
+ CURAND_ORDERING_QUASI_DEFAULT = 201 ///< Specific n-dimensional ordering for quasirandom results
148
+ };
149
+
150
+ /*
151
+ * CURAND ordering of results in memory
152
+ */
153
+ /** \cond UNHIDE_TYPEDEFS */
154
+ typedef enum curandOrdering curandOrdering_t;
155
+ /** \endcond */
156
+
157
+ /**
158
+ * CURAND choice of direction vector set
159
+ */
160
+ enum curandDirectionVectorSet {
161
+ CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
162
+ CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
163
+ CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103, ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
164
+ CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
165
+ };
166
+
167
+ /*
168
+ * CURAND choice of direction vector set
169
+ */
170
+ /** \cond UNHIDE_TYPEDEFS */
171
+ typedef enum curandDirectionVectorSet curandDirectionVectorSet_t;
172
+ /** \endcond */
173
+
174
+ /**
175
+ * CURAND array of 32-bit direction vectors
176
+ */
177
+ /** \cond UNHIDE_TYPEDEFS */
178
+ typedef unsigned int curandDirectionVectors32_t[32];
179
+ /** \endcond */
180
+
181
+ /**
182
+ * CURAND array of 64-bit direction vectors
183
+ */
184
+ /** \cond UNHIDE_TYPEDEFS */
185
+ typedef unsigned long long curandDirectionVectors64_t[64];
186
+ /** \endcond **/
187
+
188
+ /**
189
+ * CURAND generator (opaque)
190
+ */
191
+ struct curandGenerator_st;
192
+
193
+ /**
194
+ * CURAND generator
195
+ */
196
+ /** \cond UNHIDE_TYPEDEFS */
197
+ typedef struct curandGenerator_st *curandGenerator_t;
198
+ /** \endcond */
199
+
200
+ /**
201
+ * CURAND distribution
202
+ */
203
+ /** \cond UNHIDE_TYPEDEFS */
204
+ typedef double curandDistribution_st;
205
+ typedef curandDistribution_st *curandDistribution_t;
206
+ typedef struct curandDistributionShift_st *curandDistributionShift_t;
207
+ /** \endcond */
208
+ /**
209
+ * CURAND distribution M2
210
+ */
211
+ /** \cond UNHIDE_TYPEDEFS */
212
+ typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t;
213
+ typedef struct curandHistogramM2_st *curandHistogramM2_t;
214
+ typedef unsigned int curandHistogramM2K_st;
215
+ typedef curandHistogramM2K_st *curandHistogramM2K_t;
216
+ typedef curandDistribution_st curandHistogramM2V_st;
217
+ typedef curandHistogramM2V_st *curandHistogramM2V_t;
218
+
219
+ typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;
220
+ /** \endcond */
221
+
222
+ /*
223
+ * CURAND METHOD
224
+ */
225
+ /** \cond UNHIDE_ENUMS */
226
+ enum curandMethod {
227
+ CURAND_CHOOSE_BEST = 0, // choose best depends on args
228
+ CURAND_ITR = 1,
229
+ CURAND_KNUTH = 2,
230
+ CURAND_HITR = 3,
231
+ CURAND_M1 = 4,
232
+ CURAND_M2 = 5,
233
+ CURAND_BINARY_SEARCH = 6,
234
+ CURAND_DISCRETE_GAUSS = 7,
235
+ CURAND_REJECTION = 8,
236
+ CURAND_DEVICE_API = 9,
237
+ CURAND_FAST_REJECTION = 10,
238
+ CURAND_3RD = 11,
239
+ CURAND_DEFINITION = 12,
240
+ CURAND_POISSON = 13
241
+ };
242
+
243
+ typedef enum curandMethod curandMethod_t;
244
+ /** \endcond */
245
+
246
+
247
+ #ifndef __CUDACC_RTC__
248
+
249
+ /**
250
+ * @}
251
+ */
252
+
253
+ /**
254
+ * \brief Create new random number generator.
255
+ *
256
+ * Creates a new random number generator of type \p rng_type
257
+ * and returns it in \p *generator.
258
+ *
259
+ * Legal values for \p rng_type are:
260
+ * - CURAND_RNG_PSEUDO_DEFAULT
261
+ * - CURAND_RNG_PSEUDO_XORWOW
262
+ * - CURAND_RNG_PSEUDO_MRG32K3A
263
+ * - CURAND_RNG_PSEUDO_MTGP32
264
+ * - CURAND_RNG_PSEUDO_MT19937
265
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
266
+ * - CURAND_RNG_QUASI_DEFAULT
267
+ * - CURAND_RNG_QUASI_SOBOL32
268
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
269
+ * - CURAND_RNG_QUASI_SOBOL64
270
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
271
+ *
272
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
273
+ * is CURAND_RNG_PSEUDO_XORWOW. \n
274
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
275
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
276
+ *
277
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
278
+ * - \p seed = 0
279
+ * - \p offset = 0
280
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
281
+ *
282
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
283
+ * - \p seed = 0
284
+ * - \p offset = 0
285
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
286
+ *
287
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
288
+ * - \p seed = 0
289
+ * - \p offset = 0
290
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
291
+ *
292
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
293
+ * - \p seed = 0
294
+ * - \p offset = 0
295
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
296
+ *
297
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
298
+ * - \p seed = 0
299
+ * - \p offset = 0
300
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
301
+ *
302
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
303
+ * - \p dimensions = 1
304
+ * - \p offset = 0
305
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
306
+ *
307
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
308
+ * - \p dimensions = 1
309
+ * - \p offset = 0
310
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
311
+ *
312
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBBLED_SOBOL32 are:
313
+ * - \p dimensions = 1
314
+ * - \p offset = 0
315
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
316
+ *
317
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
318
+ * - \p dimensions = 1
319
+ * - \p offset = 0
320
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
321
+ *
322
+ * \param generator - Pointer to generator
323
+ * \param rng_type - Type of generator to create
324
+ *
325
+ * \return
326
+ * - CURAND_STATUS_ALLOCATION_FAILED, if memory could not be allocated \n
327
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
328
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
329
+ * dynamically linked library version \n
330
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
331
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
332
+ *
333
+ */
334
+ curandStatus_t CURANDAPI
335
+ curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type);
336
+
337
+ /**
338
+ * \brief Create new host CPU random number generator.
339
+ *
340
+ * Creates a new host CPU random number generator of type \p rng_type
341
+ * and returns it in \p *generator.
342
+ *
343
+ * Legal values for \p rng_type are:
344
+ * - CURAND_RNG_PSEUDO_DEFAULT
345
+ * - CURAND_RNG_PSEUDO_XORWOW
346
+ * - CURAND_RNG_PSEUDO_MRG32K3A
347
+ * - CURAND_RNG_PSEUDO_MTGP32
348
+ * - CURAND_RNG_PSEUDO_MT19937
349
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
350
+ * - CURAND_RNG_QUASI_DEFAULT
351
+ * - CURAND_RNG_QUASI_SOBOL32
352
+ *
353
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
354
+ * is CURAND_RNG_PSEUDO_XORWOW. \n
355
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
356
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
357
+ *
358
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
359
+ * - \p seed = 0
360
+ * - \p offset = 0
361
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
362
+ *
363
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
364
+ * - \p seed = 0
365
+ * - \p offset = 0
366
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
367
+ *
368
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
369
+ * - \p seed = 0
370
+ * - \p offset = 0
371
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
372
+ *
373
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
374
+ * - \p seed = 0
375
+ * - \p offset = 0
376
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
377
+ *
378
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
379
+ * - \p seed = 0
380
+ * - \p offset = 0
381
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
382
+ *
383
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
384
+ * - \p dimensions = 1
385
+ * - \p offset = 0
386
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
387
+ *
388
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
389
+ * - \p dimensions = 1
390
+ * - \p offset = 0
391
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
392
+ *
393
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 are:
394
+ * - \p dimensions = 1
395
+ * - \p offset = 0
396
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
397
+ *
398
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
399
+ * - \p dimensions = 1
400
+ * - \p offset = 0
401
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
402
+ *
403
+ * \param generator - Pointer to generator
404
+ * \param rng_type - Type of generator to create
405
+ *
406
+ * \return
407
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
408
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
409
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
410
+ * dynamically linked library version \n
411
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
412
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
413
+ */
414
+ curandStatus_t CURANDAPI
415
+ curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type);
416
+
417
+ /**
418
+ * \brief Destroy an existing generator.
419
+ *
420
+ * Destroy an existing generator and free all memory associated with its state.
421
+ *
422
+ * \param generator - Generator to destroy
423
+ *
424
+ * \return
425
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
426
+ * - CURAND_STATUS_SUCCESS if generator was destroyed successfully \n
427
+ */
428
+ curandStatus_t CURANDAPI
429
+ curandDestroyGenerator(curandGenerator_t generator);
430
+
431
+ /**
432
+ * \brief Return the version number of the library.
433
+ *
434
+ * Return in \p *version the version number of the dynamically linked CURAND
435
+ * library. The format is the same as CUDART_VERSION from the CUDA Runtime.
436
+ * The only supported configuration is CURAND version equal to CUDA Runtime
437
+ * version.
438
+ *
439
+ * \param version - CURAND library version
440
+ *
441
+ * \return
442
+ * - CURAND_STATUS_SUCCESS if the version number was successfully returned \n
443
+ */
444
+ curandStatus_t CURANDAPI
445
+ curandGetVersion(int *version);
446
+
447
+ /**
448
+ * \brief Return the value of the curand property.
449
+ *
450
+ * Return in \p *value the number for the property described by \p type of the
451
+ * dynamically linked CURAND library.
452
+ *
453
+ * \param type - CUDA library property
454
+ * \param value - integer value for the requested property
455
+ *
456
+ * \return
457
+ * - CURAND_STATUS_SUCCESS if the property value was successfully returned \n
458
+ * - CURAND_STATUS_OUT_OF_RANGE if the property type is not recognized \n
459
+ */
460
+ curandStatus_t CURANDAPI
461
+ curandGetProperty(libraryPropertyType type, int *value);
462
+
463
+
464
+ /**
465
+ * \brief Set the current stream for CURAND kernel launches.
466
+ *
467
+ * Set the current stream for CURAND kernel launches. All library functions
468
+ * will use this stream until set again.
469
+ *
470
+ * \param generator - Generator to modify
471
+ * \param stream - Stream to use or ::NULL for null stream
472
+ *
473
+ * \return
474
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
475
+ * - CURAND_STATUS_SUCCESS if stream was set successfully \n
476
+ */
477
+ curandStatus_t CURANDAPI
478
+ curandSetStream(curandGenerator_t generator, cudaStream_t stream);
479
+
480
+ /**
481
+ * \brief Set the seed value of the pseudo-random number generator.
482
+ *
483
+ * Set the seed value of the pseudorandom number generator.
484
+ * All values of seed are valid. Different seeds will produce different sequences.
485
+ * Different seeds will often not be statistically correlated with each other,
486
+ * but some pairs of seed values may generate sequences which are statistically correlated.
487
+ *
488
+ * \param generator - Generator to modify
489
+ * \param seed - Seed value
490
+ *
491
+ * \return
492
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
493
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a pseudorandom number generator \n
494
+ * - CURAND_STATUS_SUCCESS if generator seed was set successfully \n
495
+ */
496
+ curandStatus_t CURANDAPI
497
+ curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed);
498
+
499
+ /**
500
+ * \brief Set the absolute offset of the pseudo or quasirandom number generator.
501
+ *
502
+ * Set the absolute offset of the pseudo or quasirandom number generator.
503
+ *
504
+ * All values of offset are valid. The offset position is absolute, not
505
+ * relative to the current position in the sequence.
506
+ *
507
+ * \param generator - Generator to modify
508
+ * \param offset - Absolute offset position
509
+ *
510
+ * \return
511
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
512
+ * - CURAND_STATUS_SUCCESS if generator offset was set successfully \n
513
+ */
514
+ curandStatus_t CURANDAPI
515
+ curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset);
516
+
517
+ /**
518
+ * \brief Set the ordering of results of the pseudo or quasirandom number generator.
519
+ *
520
+ * Set the ordering of results of the pseudo or quasirandom number generator.
521
+ *
522
+ * Legal values of \p order for pseudorandom generators are:
523
+ * - CURAND_ORDERING_PSEUDO_DEFAULT
524
+ * - CURAND_ORDERING_PSEUDO_BEST
525
+ * - CURAND_ORDERING_PSEUDO_SEEDED
526
+ * - CURAND_ORDERING_PSEUDO_LEGACY
527
+ *
528
+ * Legal values of \p order for quasirandom generators are:
529
+ * - CURAND_ORDERING_QUASI_DEFAULT
530
+ *
531
+ * \param generator - Generator to modify
532
+ * \param order - Ordering of results
533
+ *
534
+ * \return
535
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
536
+ * - CURAND_STATUS_OUT_OF_RANGE if the ordering is not valid \n
537
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
538
+ */
539
+ curandStatus_t CURANDAPI
540
+ curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order);
541
+
542
+ /**
543
+ * \brief Set the number of dimensions.
544
+ *
545
+ * Set the number of dimensions to be generated by the quasirandom number
546
+ * generator.
547
+ *
548
+ * Legal values for \p num_dimensions are 1 to 20000.
549
+ *
550
+ * \param generator - Generator to modify
551
+ * \param num_dimensions - Number of dimensions
552
+ *
553
+ * \return
554
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
555
+ * - CURAND_STATUS_OUT_OF_RANGE if num_dimensions is not valid \n
556
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a quasirandom number generator \n
557
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
558
+ */
559
+ curandStatus_t CURANDAPI
560
+ curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions);
561
+
562
+ /**
563
+ * \brief Generate 32-bit pseudo or quasirandom numbers.
564
+ *
565
+ * Use \p generator to generate \p num 32-bit results into the device memory at
566
+ * \p outputPtr. The device memory must have been previously allocated and be
567
+ * large enough to hold all the results. Launches are done with the stream
568
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
569
+ *
570
+ * Results are 32-bit values with every bit random.
571
+ *
572
+ * \param generator - Generator to use
573
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
574
+ * Pointer to host memory to store CPU-generated results
575
+ * \param num - Number of random 32-bit values to generate
576
+ *
577
+ * \return
578
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
579
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
580
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
581
+ * a previous kernel launch \n
582
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
583
+ * not a multiple of the quasirandom dimension \n
584
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
585
+ * - CURAND_STATUS_TYPE_ERROR if the generator is a 64 bit quasirandom generator.
586
+ * (use ::curandGenerateLongLong() with 64 bit quasirandom generators)
587
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
588
+ */
589
+ curandStatus_t CURANDAPI
590
+ curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num);
591
+
592
+ /**
593
+ * \brief Generate 64-bit quasirandom numbers.
594
+ *
595
+ * Use \p generator to generate \p num 64-bit results into the device memory at
596
+ * \p outputPtr. The device memory must have been previously allocated and be
597
+ * large enough to hold all the results. Launches are done with the stream
598
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
599
+ *
600
+ * Results are 64-bit values with every bit random.
601
+ *
602
+ * \param generator - Generator to use
603
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
604
+ * Pointer to host memory to store CPU-generated results
605
+ * \param num - Number of random 64-bit values to generate
606
+ *
607
+ * \return
608
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
609
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
610
+ * a previous kernel launch \n
611
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
612
+ * not a multiple of the quasirandom dimension \n
613
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
614
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a 64 bit quasirandom generator\n
615
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
616
+ */
617
+ curandStatus_t CURANDAPI
618
+ curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num);
619
+
620
+ /**
621
+ * \brief Generate uniformly distributed floats.
622
+ *
623
+ * Use \p generator to generate \p num float results into the device memory at
624
+ * \p outputPtr. The device memory must have been previously allocated and be
625
+ * large enough to hold all the results. Launches are done with the stream
626
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
627
+ *
628
+ * Results are 32-bit floating point values between \p 0.0f and \p 1.0f,
629
+ * excluding \p 0.0f and including \p 1.0f.
630
+ *
631
+ * \param generator - Generator to use
632
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
633
+ * Pointer to host memory to store CPU-generated results
634
+ * \param num - Number of floats to generate
635
+ *
636
+ * \return
637
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
638
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
639
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
640
+ * a previous kernel launch \n
641
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
642
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
643
+ * not a multiple of the quasirandom dimension \n
644
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
645
+ */
646
+ curandStatus_t CURANDAPI
647
+ curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num);
648
+
649
+ /**
650
+ * \brief Generate uniformly distributed doubles.
651
+ *
652
+ * Use \p generator to generate \p num double results into the device memory at
653
+ * \p outputPtr. The device memory must have been previously allocated and be
654
+ * large enough to hold all the results. Launches are done with the stream
655
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
656
+ *
657
+ * Results are 64-bit double precision floating point values between
658
+ * \p 0.0 and \p 1.0, excluding \p 0.0 and including \p 1.0.
659
+ *
660
+ * \param generator - Generator to use
661
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
662
+ * Pointer to host memory to store CPU-generated results
663
+ * \param num - Number of doubles to generate
664
+ *
665
+ * \return
666
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
667
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
668
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
669
+ * a previous kernel launch \n
670
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
671
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
672
+ * not a multiple of the quasirandom dimension \n
673
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
674
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
675
+ */
676
+ curandStatus_t CURANDAPI
677
+ curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num);
678
+
679
+ /**
680
+ * \brief Generate normally distributed doubles.
681
+ *
682
+ * Use \p generator to generate \p n float results into the device memory at
683
+ * \p outputPtr. The device memory must have been previously allocated and be
684
+ * large enough to hold all the results. Launches are done with the stream
685
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
686
+ *
687
+ * Results are 32-bit floating point values with mean \p mean and standard
688
+ * deviation \p stddev.
689
+ *
690
+ * Normally distributed results are generated from pseudorandom generators
691
+ * with a Box-Muller transform, and so require \p n to be even.
692
+ * Quasirandom generators use an inverse cumulative distribution
693
+ * function to preserve dimensionality.
694
+ *
695
+ * There may be slight numerical differences between results generated
696
+ * on the GPU with generators created with ::curandCreateGenerator()
697
+ * and results calculated on the CPU with generators created with
698
+ * ::curandCreateGeneratorHost(). These differences arise because of
699
+ * differences in results for transcendental functions. In addition,
700
+ * future versions of CURAND may use newer versions of the CUDA math
701
+ * library, so different versions of CURAND may give slightly different
702
+ * numerical values.
703
+ *
704
+ * \param generator - Generator to use
705
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
706
+ * Pointer to host memory to store CPU-generated results
707
+ * \param n - Number of floats to generate
708
+ * \param mean - Mean of normal distribution
709
+ * \param stddev - Standard deviation of normal distribution
710
+ *
711
+ * \return
712
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
713
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
714
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
715
+ * a previous kernel launch \n
716
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
717
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
718
+ * not a multiple of the quasirandom dimension, or is not a multiple
719
+ * of two for pseudorandom generators \n
720
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
721
+ */
722
+ curandStatus_t CURANDAPI
723
+ curandGenerateNormal(curandGenerator_t generator, float *outputPtr,
724
+ size_t n, float mean, float stddev);
725
+
726
+ /**
727
+ * \brief Generate normally distributed doubles.
728
+ *
729
+ * Use \p generator to generate \p n double results into the device memory at
730
+ * \p outputPtr. The device memory must have been previously allocated and be
731
+ * large enough to hold all the results. Launches are done with the stream
732
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
733
+ *
734
+ * Results are 64-bit floating point values with mean \p mean and standard
735
+ * deviation \p stddev.
736
+ *
737
+ * Normally distributed results are generated from pseudorandom generators
738
+ * with a Box-Muller transform, and so require \p n to be even.
739
+ * Quasirandom generators use an inverse cumulative distribution
740
+ * function to preserve dimensionality.
741
+ *
742
+ * There may be slight numerical differences between results generated
743
+ * on the GPU with generators created with ::curandCreateGenerator()
744
+ * and results calculated on the CPU with generators created with
745
+ * ::curandCreateGeneratorHost(). These differences arise because of
746
+ * differences in results for transcendental functions. In addition,
747
+ * future versions of CURAND may use newer versions of the CUDA math
748
+ * library, so different versions of CURAND may give slightly different
749
+ * numerical values.
750
+ *
751
+ * \param generator - Generator to use
752
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
753
+ * Pointer to host memory to store CPU-generated results
754
+ * \param n - Number of doubles to generate
755
+ * \param mean - Mean of normal distribution
756
+ * \param stddev - Standard deviation of normal distribution
757
+ *
758
+ * \return
759
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
760
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
761
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
762
+ * a previous kernel launch \n
763
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
764
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
765
+ * not a multiple of the quasirandom dimension, or is not a multiple
766
+ * of two for pseudorandom generators \n
767
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
768
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
769
+ */
770
+ curandStatus_t CURANDAPI
771
+ curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr,
772
+ size_t n, double mean, double stddev);
773
+
774
+ /**
775
+ * \brief Generate log-normally distributed floats.
776
+ *
777
+ * Use \p generator to generate \p n float results into the device memory at
778
+ * \p outputPtr. The device memory must have been previously allocated and be
779
+ * large enough to hold all the results. Launches are done with the stream
780
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
781
+ *
782
+ * Results are 32-bit floating point values with log-normal distribution based on
783
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
784
+ *
785
+ * Normally distributed results are generated from pseudorandom generators
786
+ * with a Box-Muller transform, and so require \p n to be even.
787
+ * Quasirandom generators use an inverse cumulative distribution
788
+ * function to preserve dimensionality.
789
+ * The normally distributed results are transformed into log-normal distribution.
790
+ *
791
+ * There may be slight numerical differences between results generated
792
+ * on the GPU with generators created with ::curandCreateGenerator()
793
+ * and results calculated on the CPU with generators created with
794
+ * ::curandCreateGeneratorHost(). These differences arise because of
795
+ * differences in results for transcendental functions. In addition,
796
+ * future versions of CURAND may use newer versions of the CUDA math
797
+ * library, so different versions of CURAND may give slightly different
798
+ * numerical values.
799
+ *
800
+ * \param generator - Generator to use
801
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
802
+ * Pointer to host memory to store CPU-generated results
803
+ * \param n - Number of floats to generate
804
+ * \param mean - Mean of associated normal distribution
805
+ * \param stddev - Standard deviation of associated normal distribution
806
+ *
807
+ * \return
808
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
809
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
810
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
811
+ * a previous kernel launch \n
812
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
813
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
814
+ * not a multiple of the quasirandom dimension, or is not a multiple
815
+ * of two for pseudorandom generators \n
816
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
817
+ */
818
+ curandStatus_t CURANDAPI
819
+ curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr,
820
+ size_t n, float mean, float stddev);
821
+
822
+ /**
823
+ * \brief Generate log-normally distributed doubles.
824
+ *
825
+ * Use \p generator to generate \p n double results into the device memory at
826
+ * \p outputPtr. The device memory must have been previously allocated and be
827
+ * large enough to hold all the results. Launches are done with the stream
828
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
829
+ *
830
+ * Results are 64-bit floating point values with log-normal distribution based on
831
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
832
+ *
833
+ * Normally distributed results are generated from pseudorandom generators
834
+ * with a Box-Muller transform, and so require \p n to be even.
835
+ * Quasirandom generators use an inverse cumulative distribution
836
+ * function to preserve dimensionality.
837
+ * The normally distributed results are transformed into log-normal distribution.
838
+ *
839
+ * There may be slight numerical differences between results generated
840
+ * on the GPU with generators created with ::curandCreateGenerator()
841
+ * and results calculated on the CPU with generators created with
842
+ * ::curandCreateGeneratorHost(). These differences arise because of
843
+ * differences in results for transcendental functions. In addition,
844
+ * future versions of CURAND may use newer versions of the CUDA math
845
+ * library, so different versions of CURAND may give slightly different
846
+ * numerical values.
847
+ *
848
+ * \param generator - Generator to use
849
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
850
+ * Pointer to host memory to store CPU-generated results
851
+ * \param n - Number of doubles to generate
852
+ * \param mean - Mean of normal distribution
853
+ * \param stddev - Standard deviation of normal distribution
854
+ *
855
+ * \return
856
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
857
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
858
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
859
+ * a previous kernel launch \n
860
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
861
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
862
+ * not a multiple of the quasirandom dimension, or is not a multiple
863
+ * of two for pseudorandom generators \n
864
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
865
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
866
+ */
867
+ curandStatus_t CURANDAPI
868
+ curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr,
869
+ size_t n, double mean, double stddev);
870
+
871
+ /**
872
+ * \brief Construct the histogram array for a Poisson distribution.
873
+ *
874
+ * Construct the histogram array for the Poisson distribution with lambda \p lambda.
875
+ * For lambda greater than 2000, an approximation with a normal distribution is used.
876
+ *
877
+ * \param lambda - lambda for the Poisson distribution
878
+ *
879
+ *
880
+ * \param discrete_distribution - pointer to the histogram in device memory
881
+ *
882
+ * \return
883
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
884
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
885
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
886
+ * - CURAND_STATUS_NOT_INITIALIZED if the distribution pointer was null \n
887
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
888
+ * a previous kernel launch \n
889
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
890
+ * - CURAND_STATUS_SUCCESS if the histogram was generated successfully \n
891
+ */
892
+
893
+ curandStatus_t CURANDAPI
894
+ curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution);
895
+
896
+
897
+
898
+ /**
899
+ * \brief Destroy the histogram array for a discrete distribution (e.g. Poisson).
900
+ *
901
+ * Destroy the histogram array for a discrete distribution created by curandCreatePoissonDistribution.
902
+ *
903
+ * \param discrete_distribution - pointer to device memory where the histogram is stored
904
+ *
905
+ * \return
906
+ * - CURAND_STATUS_NOT_INITIALIZED if the histogram was never created \n
907
+ * - CURAND_STATUS_SUCCESS if the histogram was destroyed successfully \n
908
+ */
909
+ curandStatus_t CURANDAPI
910
+ curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution);
911
+
912
+
913
+ /**
914
+ * \brief Generate Poisson-distributed unsigned ints.
915
+ *
916
+ * Use \p generator to generate \p n unsigned int results into device memory at
917
+ * \p outputPtr. The device memory must have been previously allocated and must be
918
+ * large enough to hold all the results. Launches are done with the stream
919
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
920
+ *
921
+ * Results are 32-bit unsigned int point values with Poisson distribution, with lambda \p lambda.
922
+ *
923
+ * \param generator - Generator to use
924
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
925
+ * Pointer to host memory to store CPU-generated results
926
+ * \param n - Number of unsigned ints to generate
927
+ * \param lambda - lambda for the Poisson distribution
928
+ *
929
+ * \return
930
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
931
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
932
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
933
+ * a previous kernel launch \n
934
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
935
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
936
+ * not a multiple of the quasirandom dimension\n
937
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU or sm does not support double precision \n
938
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
939
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
940
+ */
941
+
942
+ curandStatus_t CURANDAPI
943
+ curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr,
944
+ size_t n, double lambda);
945
+ // just for internal usage
946
+ curandStatus_t CURANDAPI
947
+ curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr,
948
+ size_t n, double lambda, curandMethod_t method);
949
+
950
+
951
+ curandStatus_t CURANDAPI
952
+ curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr,
953
+ size_t num, unsigned int n, double p);
954
+ // just for internal usage
955
+ curandStatus_t CURANDAPI
956
+ curandGenerateBinomialMethod(curandGenerator_t generator,
957
+ unsigned int *outputPtr,
958
+ size_t num, unsigned int n, double p,
959
+ curandMethod_t method);
960
+
961
+
962
+ /**
963
+ * \brief Setup starting states.
964
+ *
965
+ * Generate the starting state of the generator. This function is
966
+ * automatically called by generation functions such as
967
+ * ::curandGenerate() and ::curandGenerateUniform().
968
+ * It can be called manually for performance testing reasons to separate
969
+ * timings for starting state generation and random number generation.
970
+ *
971
+ * \param generator - Generator to update
972
+ *
973
+ * \return
974
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
975
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
976
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
977
+ * a previous kernel launch \n
978
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
979
+ * - CURAND_STATUS_SUCCESS if the seeds were generated successfully \n
980
+ */
981
+ curandStatus_t CURANDAPI
982
+ curandGenerateSeeds(curandGenerator_t generator);
983
+
984
+ /**
985
+ * \brief Get direction vectors for 32-bit quasirandom number generation.
986
+ *
987
+ * Get a pointer to an array of direction vectors that can be used
988
+ * for quasirandom number generation. The resulting pointer will
989
+ * reference an array of direction vectors in host memory.
990
+ *
991
+ * The array contains vectors for many dimensions. Each dimension
992
+ * has 32 vectors. Each individual vector is an unsigned int.
993
+ *
994
+ * Legal values for \p set are:
995
+ * - CURAND_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
996
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
997
+ *
998
+ * \param vectors - Address of pointer in which to return direction vectors
999
+ * \param set - Which set of direction vectors to use
1000
+ *
1001
+ * \return
1002
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
1003
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1004
+ */
1005
+ curandStatus_t CURANDAPI
1006
+ curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set);
1007
+
1008
+ /**
1009
+ * \brief Get scramble constants for 32-bit scrambled Sobol' .
1010
+ *
1011
+ * Get a pointer to an array of scramble constants that can be used
1012
+ * for quasirandom number generation. The resulting pointer will
1013
+ * reference an array of unsinged ints in host memory.
1014
+ *
1015
+ * The array contains constants for many dimensions. Each dimension
1016
+ * has a single unsigned int constant.
1017
+ *
1018
+ * \param constants - Address of pointer in which to return scramble constants
1019
+ *
1020
+ * \return
1021
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1022
+ */
1023
+ curandStatus_t CURANDAPI
1024
+ curandGetScrambleConstants32(unsigned int * * constants);
1025
+
1026
+ /**
1027
+ * \brief Get direction vectors for 64-bit quasirandom number generation.
1028
+ *
1029
+ * Get a pointer to an array of direction vectors that can be used
1030
+ * for quasirandom number generation. The resulting pointer will
1031
+ * reference an array of direction vectors in host memory.
1032
+ *
1033
+ * The array contains vectors for many dimensions. Each dimension
1034
+ * has 64 vectors. Each individual vector is an unsigned long long.
1035
+ *
1036
+ * Legal values for \p set are:
1037
+ * - CURAND_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
1038
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
1039
+ *
1040
+ * \param vectors - Address of pointer in which to return direction vectors
1041
+ * \param set - Which set of direction vectors to use
1042
+ *
1043
+ * \return
1044
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
1045
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1046
+ */
1047
+ curandStatus_t CURANDAPI
1048
+ curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set);
1049
+
1050
+ /**
1051
+ * \brief Get scramble constants for 64-bit scrambled Sobol' .
1052
+ *
1053
+ * Get a pointer to an array of scramble constants that can be used
1054
+ * for quasirandom number generation. The resulting pointer will
1055
+ * reference an array of unsinged long longs in host memory.
1056
+ *
1057
+ * The array contains constants for many dimensions. Each dimension
1058
+ * has a single unsigned long long constant.
1059
+ *
1060
+ * \param constants - Address of pointer in which to return scramble constants
1061
+ *
1062
+ * \return
1063
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1064
+ */
1065
+ curandStatus_t CURANDAPI
1066
+ curandGetScrambleConstants64(unsigned long long * * constants);
1067
+
1068
+ /** @} */
1069
+
1070
+ #endif // __CUDACC_RTC__
1071
+
1072
+ #if defined(__cplusplus)
1073
+ }
1074
+ #endif /* __cplusplus */
1075
+
1076
+
1077
+ #endif /* !defined(CURAND_H_) */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_nvrtc_cu11-11.8.89.dist-info/License.txt ADDED
@@ -0,0 +1,1568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ End User License Agreement
2
+ --------------------------
3
+
4
+
5
+ Preface
6
+ -------
7
+
8
+ The Software License Agreement in Chapter 1 and the Supplement
9
+ in Chapter 2 contain license terms and conditions that govern
10
+ the use of NVIDIA software. By accepting this agreement, you
11
+ agree to comply with all the terms and conditions applicable
12
+ to the product(s) included herein.
13
+
14
+
15
+ NVIDIA Driver
16
+
17
+
18
+ Description
19
+
20
+ This package contains the operating system driver and
21
+ fundamental system software components for NVIDIA GPUs.
22
+
23
+
24
+ NVIDIA CUDA Toolkit
25
+
26
+
27
+ Description
28
+
29
+ The NVIDIA CUDA Toolkit provides command-line and graphical
30
+ tools for building, debugging and optimizing the performance
31
+ of applications accelerated by NVIDIA GPUs, runtime and math
32
+ libraries, and documentation including programming guides,
33
+ user manuals, and API references.
34
+
35
+
36
+ Default Install Location of CUDA Toolkit
37
+
38
+ Windows platform:
39
+
40
+ %ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
41
+
42
+ Linux platform:
43
+
44
+ /usr/local/cuda-#.#
45
+
46
+ Mac platform:
47
+
48
+ /Developer/NVIDIA/CUDA-#.#
49
+
50
+
51
+ NVIDIA CUDA Samples
52
+
53
+
54
+ Description
55
+
56
+ This package includes over 100+ CUDA examples that demonstrate
57
+ various CUDA programming principles, and efficient CUDA
58
+ implementation of algorithms in specific application domains.
59
+
60
+
61
+ Default Install Location of CUDA Samples
62
+
63
+ Windows platform:
64
+
65
+ %ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
66
+
67
+ Linux platform:
68
+
69
+ /usr/local/cuda-#.#/samples
70
+
71
+ and
72
+
73
+ $HOME/NVIDIA_CUDA-#.#_Samples
74
+
75
+ Mac platform:
76
+
77
+ /Developer/NVIDIA/CUDA-#.#/samples
78
+
79
+
80
+ NVIDIA Nsight Visual Studio Edition (Windows only)
81
+
82
+
83
+ Description
84
+
85
+ NVIDIA Nsight Development Platform, Visual Studio Edition is a
86
+ development environment integrated into Microsoft Visual
87
+ Studio that provides tools for debugging, profiling, analyzing
88
+ and optimizing your GPU computing and graphics applications.
89
+
90
+
91
+ Default Install Location of Nsight Visual Studio Edition
92
+
93
+ Windows platform:
94
+
95
+ %ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
96
+
97
+
98
+ 1. License Agreement for NVIDIA Software Development Kits
99
+ ---------------------------------------------------------
100
+
101
+
102
+ Release Date: July 26, 2018
103
+ ---------------------------
104
+
105
+
106
+ Important NoticeRead before downloading, installing,
107
+ copying or using the licensed software:
108
+ -------------------------------------------------------
109
+
110
+ This license agreement, including exhibits attached
111
+ ("Agreement”) is a legal agreement between you and NVIDIA
112
+ Corporation ("NVIDIA") and governs your use of a NVIDIA
113
+ software development kit (“SDK”).
114
+
115
+ Each SDK has its own set of software and materials, but here
116
+ is a description of the types of items that may be included in
117
+ a SDK: source code, header files, APIs, data sets and assets
118
+ (examples include images, textures, models, scenes, videos,
119
+ native API input/output files), binary software, sample code,
120
+ libraries, utility programs, programming code and
121
+ documentation.
122
+
123
+ This Agreement can be accepted only by an adult of legal age
124
+ of majority in the country in which the SDK is used.
125
+
126
+ If you are entering into this Agreement on behalf of a company
127
+ or other legal entity, you represent that you have the legal
128
+ authority to bind the entity to this Agreement, in which case
129
+ “you” will mean the entity you represent.
130
+
131
+ If you don’t have the required age or authority to accept
132
+ this Agreement, or if you don’t accept all the terms and
133
+ conditions of this Agreement, do not download, install or use
134
+ the SDK.
135
+
136
+ You agree to use the SDK only for purposes that are permitted
137
+ by (a) this Agreement, and (b) any applicable law, regulation
138
+ or generally accepted practices or guidelines in the relevant
139
+ jurisdictions.
140
+
141
+
142
+ 1.1. License
143
+
144
+
145
+ 1.1.1. License Grant
146
+
147
+ Subject to the terms of this Agreement, NVIDIA hereby grants
148
+ you a non-exclusive, non-transferable license, without the
149
+ right to sublicense (except as expressly provided in this
150
+ Agreement) to:
151
+
152
+ 1. Install and use the SDK,
153
+
154
+ 2. Modify and create derivative works of sample source code
155
+ delivered in the SDK, and
156
+
157
+ 3. Distribute those portions of the SDK that are identified
158
+ in this Agreement as distributable, as incorporated in
159
+ object code format into a software application that meets
160
+ the distribution requirements indicated in this Agreement.
161
+
162
+
163
+ 1.1.2. Distribution Requirements
164
+
165
+ These are the distribution requirements for you to exercise
166
+ the distribution grant:
167
+
168
+ 1. Your application must have material additional
169
+ functionality, beyond the included portions of the SDK.
170
+
171
+ 2. The distributable portions of the SDK shall only be
172
+ accessed by your application.
173
+
174
+ 3. The following notice shall be included in modifications
175
+ and derivative works of sample source code distributed:
176
+ “This software contains source code provided by NVIDIA
177
+ Corporation.”
178
+
179
+ 4. Unless a developer tool is identified in this Agreement
180
+ as distributable, it is delivered for your internal use
181
+ only.
182
+
183
+ 5. The terms under which you distribute your application
184
+ must be consistent with the terms of this Agreement,
185
+ including (without limitation) terms relating to the
186
+ license grant and license restrictions and protection of
187
+ NVIDIA’s intellectual property rights. Additionally, you
188
+ agree that you will protect the privacy, security and
189
+ legal rights of your application users.
190
+
191
+ 6. You agree to notify NVIDIA in writing of any known or
192
+ suspected distribution or use of the SDK not in compliance
193
+ with the requirements of this Agreement, and to enforce
194
+ the terms of your agreements with respect to distributed
195
+ SDK.
196
+
197
+
198
+ 1.1.3. Authorized Users
199
+
200
+ You may allow employees and contractors of your entity or of
201
+ your subsidiary(ies) to access and use the SDK from your
202
+ secure network to perform work on your behalf.
203
+
204
+ If you are an academic institution you may allow users
205
+ enrolled or employed by the academic institution to access and
206
+ use the SDK from your secure network.
207
+
208
+ You are responsible for the compliance with the terms of this
209
+ Agreement by your authorized users. If you become aware that
210
+ your authorized users didn’t follow the terms of this
211
+ Agreement, you agree to take reasonable steps to resolve the
212
+ non-compliance and prevent new occurrences.
213
+
214
+
215
+ 1.1.4. Pre-Release SDK
216
+
217
+ The SDK versions identified as alpha, beta, preview or
218
+ otherwise as pre-release, may not be fully functional, may
219
+ contain errors or design flaws, and may have reduced or
220
+ different security, privacy, accessibility, availability, and
221
+ reliability standards relative to commercial versions of
222
+ NVIDIA software and materials. Use of a pre-release SDK may
223
+ result in unexpected results, loss of data, project delays or
224
+ other unpredictable damage or loss.
225
+
226
+ You may use a pre-release SDK at your own risk, understanding
227
+ that pre-release SDKs are not intended for use in production
228
+ or business-critical systems.
229
+
230
+ NVIDIA may choose not to make available a commercial version
231
+ of any pre-release SDK. NVIDIA may also choose to abandon
232
+ development and terminate the availability of a pre-release
233
+ SDK at any time without liability.
234
+
235
+
236
+ 1.1.5. Updates
237
+
238
+ NVIDIA may, at its option, make available patches, workarounds
239
+ or other updates to this SDK. Unless the updates are provided
240
+ with their separate governing terms, they are deemed part of
241
+ the SDK licensed to you as provided in this Agreement. You
242
+ agree that the form and content of the SDK that NVIDIA
243
+ provides may change without prior notice to you. While NVIDIA
244
+ generally maintains compatibility between versions, NVIDIA may
245
+ in some cases make changes that introduce incompatibilities in
246
+ future versions of the SDK.
247
+
248
+
249
+ 1.1.6. Third Party Licenses
250
+
251
+ The SDK may come bundled with, or otherwise include or be
252
+ distributed with, third party software licensed by a NVIDIA
253
+ supplier and/or open source software provided under an open
254
+ source license. Use of third party software is subject to the
255
+ third-party license terms, or in the absence of third party
256
+ terms, the terms of this Agreement. Copyright to third party
257
+ software is held by the copyright holders indicated in the
258
+ third-party software or license.
259
+
260
+
261
+ 1.1.7. Reservation of Rights
262
+
263
+ NVIDIA reserves all rights, title, and interest in and to the
264
+ SDK, not expressly granted to you under this Agreement.
265
+
266
+
267
+ 1.2. Limitations
268
+
269
+ The following license limitations apply to your use of the
270
+ SDK:
271
+
272
+ 1. You may not reverse engineer, decompile or disassemble,
273
+ or remove copyright or other proprietary notices from any
274
+ portion of the SDK or copies of the SDK.
275
+
276
+ 2. Except as expressly provided in this Agreement, you may
277
+ not copy, sell, rent, sublicense, transfer, distribute,
278
+ modify, or create derivative works of any portion of the
279
+ SDK. For clarity, you may not distribute or sublicense the
280
+ SDK as a stand-alone product.
281
+
282
+ 3. Unless you have an agreement with NVIDIA for this
283
+ purpose, you may not indicate that an application created
284
+ with the SDK is sponsored or endorsed by NVIDIA.
285
+
286
+ 4. You may not bypass, disable, or circumvent any
287
+ encryption, security, digital rights management or
288
+ authentication mechanism in the SDK.
289
+
290
+ 5. You may not use the SDK in any manner that would cause it
291
+ to become subject to an open source software license. As
292
+ examples, licenses that require as a condition of use,
293
+ modification, and/or distribution that the SDK be:
294
+
295
+ a. Disclosed or distributed in source code form;
296
+
297
+ b. Licensed for the purpose of making derivative works;
298
+ or
299
+
300
+ c. Redistributable at no charge.
301
+
302
+ 6. Unless you have an agreement with NVIDIA for this
303
+ purpose, you may not use the SDK with any system or
304
+ application where the use or failure of the system or
305
+ application can reasonably be expected to threaten or
306
+ result in personal injury, death, or catastrophic loss.
307
+ Examples include use in avionics, navigation, military,
308
+ medical, life support or other life critical applications.
309
+ NVIDIA does not design, test or manufacture the SDK for
310
+ these critical uses and NVIDIA shall not be liable to you
311
+ or any third party, in whole or in part, for any claims or
312
+ damages arising from such uses.
313
+
314
+ 7. You agree to defend, indemnify and hold harmless NVIDIA
315
+ and its affiliates, and their respective employees,
316
+ contractors, agents, officers and directors, from and
317
+ against any and all claims, damages, obligations, losses,
318
+ liabilities, costs or debt, fines, restitutions and
319
+ expenses (including but not limited to attorney’s fees
320
+ and costs incident to establishing the right of
321
+ indemnification) arising out of or related to your use of
322
+ the SDK outside of the scope of this Agreement, or not in
323
+ compliance with its terms.
324
+
325
+
326
+ 1.3. Ownership
327
+
328
+ 1. NVIDIA or its licensors hold all rights, title and
329
+ interest in and to the SDK and its modifications and
330
+ derivative works, including their respective intellectual
331
+ property rights, subject to your rights described in this
332
+ section. This SDK may include software and materials from
333
+ NVIDIA’s licensors, and these licensors are intended
334
+ third party beneficiaries that may enforce this Agreement
335
+ with respect to their intellectual property rights.
336
+
337
+ 2. You hold all rights, title and interest in and to your
338
+ applications and your derivative works of the sample
339
+ source code delivered in the SDK, including their
340
+ respective intellectual property rights, subject to
341
+ NVIDIA’s rights described in this section.
342
+
343
+ 3. You may, but don’t have to, provide to NVIDIA
344
+ suggestions, feature requests or other feedback regarding
345
+ the SDK, including possible enhancements or modifications
346
+ to the SDK. For any feedback that you voluntarily provide,
347
+ you hereby grant NVIDIA and its affiliates a perpetual,
348
+ non-exclusive, worldwide, irrevocable license to use,
349
+ reproduce, modify, license, sublicense (through multiple
350
+ tiers of sublicensees), and distribute (through multiple
351
+ tiers of distributors) it without the payment of any
352
+ royalties or fees to you. NVIDIA will use feedback at its
353
+ choice. NVIDIA is constantly looking for ways to improve
354
+ its products, so you may send feedback to NVIDIA through
355
+ the developer portal at https://developer.nvidia.com.
356
+
357
+
358
+ 1.4. No Warranties
359
+
360
+ THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
361
+ FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
362
+ ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
363
+ OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
364
+ BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
365
+ FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
366
+ ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
367
+ WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
368
+ DEALING OR COURSE OF TRADE.
369
+
370
+
371
+ 1.5. Limitation of Liability
372
+
373
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
374
+ AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
375
+ PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
376
+ OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
377
+ PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
378
+ WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
379
+ WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
380
+ OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
381
+ PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
382
+ LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
383
+ TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
384
+ AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
385
+ NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
386
+ LIMIT.
387
+
388
+ These exclusions and limitations of liability shall apply
389
+ regardless if NVIDIA or its affiliates have been advised of
390
+ the possibility of such damages, and regardless of whether a
391
+ remedy fails its essential purpose. These exclusions and
392
+ limitations of liability form an essential basis of the
393
+ bargain between the parties, and, absent any of these
394
+ exclusions or limitations of liability, the provisions of this
395
+ Agreement, including, without limitation, the economic terms,
396
+ would be substantially different.
397
+
398
+
399
+ 1.6. Termination
400
+
401
+ 1. This Agreement will continue to apply until terminated by
402
+ either you or NVIDIA as described below.
403
+
404
+ 2. If you want to terminate this Agreement, you may do so by
405
+ stopping to use the SDK.
406
+
407
+ 3. NVIDIA may, at any time, terminate this Agreement if:
408
+
409
+ a. (i) you fail to comply with any term of this
410
+ Agreement and the non-compliance is not fixed within
411
+ thirty (30) days following notice from NVIDIA (or
412
+ immediately if you violate NVIDIA’s intellectual
413
+ property rights);
414
+
415
+ b. (ii) you commence or participate in any legal
416
+ proceeding against NVIDIA with respect to the SDK; or
417
+
418
+ c. (iii) NVIDIA decides to no longer provide the SDK in
419
+ a country or, in NVIDIA’s sole discretion, the
420
+ continued use of it is no longer commercially viable.
421
+
422
+ 4. Upon any termination of this Agreement, you agree to
423
+ promptly discontinue use of the SDK and destroy all copies
424
+ in your possession or control. Your prior distributions in
425
+ accordance with this Agreement are not affected by the
426
+ termination of this Agreement. Upon written request, you
427
+ will certify in writing that you have complied with your
428
+ commitments under this section. Upon any termination of
429
+ this Agreement all provisions survive except for the
430
+ license grant provisions.
431
+
432
+
433
+ 1.7. General
434
+
435
+ If you wish to assign this Agreement or your rights and
436
+ obligations, including by merger, consolidation, dissolution
437
+ or operation of law, contact NVIDIA to ask for permission. Any
438
+ attempted assignment not approved by NVIDIA in writing shall
439
+ be void and of no effect. NVIDIA may assign, delegate or
440
+ transfer this Agreement and its rights and obligations, and if
441
+ to a non-affiliate you will be notified.
442
+
443
+ You agree to cooperate with NVIDIA and provide reasonably
444
+ requested information to verify your compliance with this
445
+ Agreement.
446
+
447
+ This Agreement will be governed in all respects by the laws of
448
+ the United States and of the State of Delaware as those laws
449
+ are applied to contracts entered into and performed entirely
450
+ within Delaware by Delaware residents, without regard to the
451
+ conflicts of laws principles. The United Nations Convention on
452
+ Contracts for the International Sale of Goods is specifically
453
+ disclaimed. You agree to all terms of this Agreement in the
454
+ English language.
455
+
456
+ The state or federal courts residing in Santa Clara County,
457
+ California shall have exclusive jurisdiction over any dispute
458
+ or claim arising out of this Agreement. Notwithstanding this,
459
+ you agree that NVIDIA shall still be allowed to apply for
460
+ injunctive remedies or an equivalent type of urgent legal
461
+ relief in any jurisdiction.
462
+
463
+ If any court of competent jurisdiction determines that any
464
+ provision of this Agreement is illegal, invalid or
465
+ unenforceable, such provision will be construed as limited to
466
+ the extent necessary to be consistent with and fully
467
+ enforceable under the law and the remaining provisions will
468
+ remain in full force and effect. Unless otherwise specified,
469
+ remedies are cumulative.
470
+
471
+ Each party acknowledges and agrees that the other is an
472
+ independent contractor in the performance of this Agreement.
473
+
474
+ The SDK has been developed entirely at private expense and is
475
+ “commercial items” consisting of “commercial computer
476
+ software” and “commercial computer software
477
+ documentation” provided with RESTRICTED RIGHTS. Use,
478
+ duplication or disclosure by the U.S. Government or a U.S.
479
+ Government subcontractor is subject to the restrictions in
480
+ this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
481
+ in subparagraphs (c)(1) and (2) of the Commercial Computer
482
+ Software - Restricted Rights clause at FAR 52.227-19, as
483
+ applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
484
+ Expressway, Santa Clara, CA 95051.
485
+
486
+ The SDK is subject to United States export laws and
487
+ regulations. You agree that you will not ship, transfer or
488
+ export the SDK into any country, or use the SDK in any manner,
489
+ prohibited by the United States Bureau of Industry and
490
+ Security or economic sanctions regulations administered by the
491
+ U.S. Department of Treasury’s Office of Foreign Assets
492
+ Control (OFAC), or any applicable export laws, restrictions or
493
+ regulations. These laws include restrictions on destinations,
494
+ end users and end use. By accepting this Agreement, you
495
+ confirm that you are not a resident or citizen of any country
496
+ currently embargoed by the U.S. and that you are not otherwise
497
+ prohibited from receiving the SDK.
498
+
499
+ Any notice delivered by NVIDIA to you under this Agreement
500
+ will be delivered via mail, email or fax. You agree that any
501
+ notices that NVIDIA sends you electronically will satisfy any
502
+ legal communication requirements. Please direct your legal
503
+ notices or other correspondence to NVIDIA Corporation, 2788
504
+ San Tomas Expressway, Santa Clara, California 95051, United
505
+ States of America, Attention: Legal Department.
506
+
507
+ This Agreement and any exhibits incorporated into this
508
+ Agreement constitute the entire agreement of the parties with
509
+ respect to the subject matter of this Agreement and supersede
510
+ all prior negotiations or documentation exchanged between the
511
+ parties relating to this SDK license. Any additional and/or
512
+ conflicting terms on documents issued by you are null, void,
513
+ and invalid. Any amendment or waiver under this Agreement
514
+ shall be in writing and signed by representatives of both
515
+ parties.
516
+
517
+
518
+ 2. CUDA Toolkit Supplement to Software License Agreement for
519
+ NVIDIA Software Development Kits
520
+ ------------------------------------------------------------
521
+
522
+
523
+ Release date: August 16, 2018
524
+ -----------------------------
525
+
526
+ The terms in this supplement govern your use of the NVIDIA
527
+ CUDA Toolkit SDK under the terms of your license agreement
528
+ (“Agreement”) as modified by this supplement. Capitalized
529
+ terms used but not defined below have the meaning assigned to
530
+ them in the Agreement.
531
+
532
+ This supplement is an exhibit to the Agreement and is
533
+ incorporated as an integral part of the Agreement. In the
534
+ event of conflict between the terms in this supplement and the
535
+ terms in the Agreement, the terms in this supplement govern.
536
+
537
+
538
+ 2.1. License Scope
539
+
540
+ The SDK is licensed for you to develop applications only for
541
+ use in systems with NVIDIA GPUs.
542
+
543
+
544
+ 2.2. Distribution
545
+
546
+ The portions of the SDK that are distributable under the
547
+ Agreement are listed in Attachment A.
548
+
549
+
550
+ 2.3. Operating Systems
551
+
552
+ Those portions of the SDK designed exclusively for use on the
553
+ Linux or FreeBSD operating systems, or other operating systems
554
+ derived from the source code to these operating systems, may
555
+ be copied and redistributed for use in accordance with this
556
+ Agreement, provided that the object code files are not
557
+ modified in any way (except for unzipping of compressed
558
+ files).
559
+
560
+
561
+ 2.4. Audio and Video Encoders and Decoders
562
+
563
+ You acknowledge and agree that it is your sole responsibility
564
+ to obtain any additional third-party licenses required to
565
+ make, have made, use, have used, sell, import, and offer for
566
+ sale your products or services that include or incorporate any
567
+ third-party software and content relating to audio and/or
568
+ video encoders and decoders from, including but not limited
569
+ to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
570
+ MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
571
+ under this Agreement any necessary patent or other rights with
572
+ respect to any audio and/or video encoders and decoders.
573
+
574
+
575
+ 2.5. Licensing
576
+
577
+ If the distribution terms in this Agreement are not suitable
578
+ for your organization, or for any questions regarding this
579
+ Agreement, please contact NVIDIA at
580
+ nvidia-compute-license-questions@nvidia.com.
581
+
582
+
583
+ 2.6. Attachment A
584
+
585
+ The following portions of the SDK are distributable under the
586
+ Agreement:
587
+
588
+ Component
589
+
590
+ CUDA Runtime
591
+
592
+ Windows
593
+
594
+ cudart.dll, cudart_static.lib, cudadevrt.lib
595
+
596
+ Mac OSX
597
+
598
+ libcudart.dylib, libcudart_static.a, libcudadevrt.a
599
+
600
+ Linux
601
+
602
+ libcudart.so, libcudart_static.a, libcudadevrt.a
603
+
604
+ Android
605
+
606
+ libcudart.so, libcudart_static.a, libcudadevrt.a
607
+
608
+ Component
609
+
610
+ CUDA FFT Library
611
+
612
+ Windows
613
+
614
+ cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
615
+
616
+ Mac OSX
617
+
618
+ libcufft.dylib, libcufft_static.a, libcufftw.dylib,
619
+ libcufftw_static.a
620
+
621
+ Linux
622
+
623
+ libcufft.so, libcufft_static.a, libcufftw.so,
624
+ libcufftw_static.a
625
+
626
+ Android
627
+
628
+ libcufft.so, libcufft_static.a, libcufftw.so,
629
+ libcufftw_static.a
630
+
631
+ Component
632
+
633
+ CUDA BLAS Library
634
+
635
+ Windows
636
+
637
+ cublas.dll, cublasLt.dll
638
+
639
+ Mac OSX
640
+
641
+ libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
642
+ libcublasLt_static.a
643
+
644
+ Linux
645
+
646
+ libcublas.so, libcublasLt.so, libcublas_static.a,
647
+ libcublasLt_static.a
648
+
649
+ Android
650
+
651
+ libcublas.so, libcublasLt.so, libcublas_static.a,
652
+ libcublasLt_static.a
653
+
654
+ Component
655
+
656
+ NVIDIA "Drop-in" BLAS Library
657
+
658
+ Windows
659
+
660
+ nvblas.dll
661
+
662
+ Mac OSX
663
+
664
+ libnvblas.dylib
665
+
666
+ Linux
667
+
668
+ libnvblas.so
669
+
670
+ Component
671
+
672
+ CUDA Sparse Matrix Library
673
+
674
+ Windows
675
+
676
+ cusparse.dll, cusparse.lib
677
+
678
+ Mac OSX
679
+
680
+ libcusparse.dylib, libcusparse_static.a
681
+
682
+ Linux
683
+
684
+ libcusparse.so, libcusparse_static.a
685
+
686
+ Android
687
+
688
+ libcusparse.so, libcusparse_static.a
689
+
690
+ Component
691
+
692
+ CUDA Linear Solver Library
693
+
694
+ Windows
695
+
696
+ cusolver.dll, cusolver.lib
697
+
698
+ Mac OSX
699
+
700
+ libcusolver.dylib, libcusolver_static.a
701
+
702
+ Linux
703
+
704
+ libcusolver.so, libcusolver_static.a
705
+
706
+ Android
707
+
708
+ libcusolver.so, libcusolver_static.a
709
+
710
+ Component
711
+
712
+ CUDA Random Number Generation Library
713
+
714
+ Windows
715
+
716
+ curand.dll, curand.lib
717
+
718
+ Mac OSX
719
+
720
+ libcurand.dylib, libcurand_static.a
721
+
722
+ Linux
723
+
724
+ libcurand.so, libcurand_static.a
725
+
726
+ Android
727
+
728
+ libcurand.so, libcurand_static.a
729
+
730
+ Component
731
+
732
+ CUDA Accelerated Graph Library
733
+
734
+ Component
735
+
736
+ NVIDIA Performance Primitives Library
737
+
738
+ Windows
739
+
740
+ nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
741
+ nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
742
+ nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
743
+ nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
744
+ nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
745
+
746
+ Mac OSX
747
+
748
+ libnppc.dylib, libnppc_static.a, libnppial.dylib,
749
+ libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
750
+ libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
751
+ libnppidei_static.a, libnppif.dylib, libnppif_static.a,
752
+ libnppig.dylib, libnppig_static.a, libnppim.dylib,
753
+ libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
754
+ libnpps.dylib, libnpps_static.a
755
+
756
+ Linux
757
+
758
+ libnppc.so, libnppc_static.a, libnppial.so,
759
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
760
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
761
+ libnppidei_static.a, libnppif.so, libnppif_static.a
762
+ libnppig.so, libnppig_static.a, libnppim.so,
763
+ libnppim_static.a, libnppist.so, libnppist_static.a,
764
+ libnppisu.so, libnppisu_static.a, libnppitc.so
765
+ libnppitc_static.a, libnpps.so, libnpps_static.a
766
+
767
+ Android
768
+
769
+ libnppc.so, libnppc_static.a, libnppial.so,
770
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
771
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
772
+ libnppidei_static.a, libnppif.so, libnppif_static.a
773
+ libnppig.so, libnppig_static.a, libnppim.so,
774
+ libnppim_static.a, libnppist.so, libnppist_static.a,
775
+ libnppisu.so, libnppisu_static.a, libnppitc.so
776
+ libnppitc_static.a, libnpps.so, libnpps_static.a
777
+
778
+ Component
779
+
780
+ NVIDIA JPEG Library
781
+
782
+ Linux
783
+
784
+ libnvjpeg.so, libnvjpeg_static.a
785
+
786
+ Component
787
+
788
+ Internal common library required for statically linking to
789
+ cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
790
+
791
+ Mac OSX
792
+
793
+ libculibos.a
794
+
795
+ Linux
796
+
797
+ libculibos.a
798
+
799
+ Component
800
+
801
+ NVIDIA Runtime Compilation Library and Header
802
+
803
+ All
804
+
805
+ nvrtc.h
806
+
807
+ Windows
808
+
809
+ nvrtc.dll, nvrtc-builtins.dll
810
+
811
+ Mac OSX
812
+
813
+ libnvrtc.dylib, libnvrtc-builtins.dylib
814
+
815
+ Linux
816
+
817
+ libnvrtc.so, libnvrtc-builtins.so
818
+
819
+ Component
820
+
821
+ NVIDIA Optimizing Compiler Library
822
+
823
+ Windows
824
+
825
+ nvvm.dll
826
+
827
+ Mac OSX
828
+
829
+ libnvvm.dylib
830
+
831
+ Linux
832
+
833
+ libnvvm.so
834
+
835
+ Component
836
+
837
+ NVIDIA Common Device Math Functions Library
838
+
839
+ Windows
840
+
841
+ libdevice.10.bc
842
+
843
+ Mac OSX
844
+
845
+ libdevice.10.bc
846
+
847
+ Linux
848
+
849
+ libdevice.10.bc
850
+
851
+ Component
852
+
853
+ CUDA Occupancy Calculation Header Library
854
+
855
+ All
856
+
857
+ cuda_occupancy.h
858
+
859
+ Component
860
+
861
+ CUDA Half Precision Headers
862
+
863
+ All
864
+
865
+ cuda_fp16.h, cuda_fp16.hpp
866
+
867
+ Component
868
+
869
+ CUDA Profiling Tools Interface (CUPTI) Library
870
+
871
+ Windows
872
+
873
+ cupti.dll
874
+
875
+ Mac OSX
876
+
877
+ libcupti.dylib
878
+
879
+ Linux
880
+
881
+ libcupti.so
882
+
883
+ Component
884
+
885
+ NVIDIA Tools Extension Library
886
+
887
+ Windows
888
+
889
+ nvToolsExt.dll, nvToolsExt.lib
890
+
891
+ Mac OSX
892
+
893
+ libnvToolsExt.dylib
894
+
895
+ Linux
896
+
897
+ libnvToolsExt.so
898
+
899
+ Component
900
+
901
+ NVIDIA CUDA Driver Libraries
902
+
903
+ Linux
904
+
905
+ libcuda.so, libnvidia-fatbinaryloader.so,
906
+ libnvidia-ptxjitcompiler.so
907
+
908
+ The NVIDIA CUDA Driver Libraries are only distributable in
909
+ applications that meet this criteria:
910
+
911
+ 1. The application was developed starting from a NVIDIA CUDA
912
+ container obtained from Docker Hub or the NVIDIA GPU
913
+ Cloud, and
914
+
915
+ 2. The resulting application is packaged as a Docker
916
+ container and distributed to users on Docker Hub or the
917
+ NVIDIA GPU Cloud only.
918
+
919
+
920
+ 2.7. Attachment B
921
+
922
+
923
+ Additional Licensing Obligations
924
+
925
+ The following third party components included in the SOFTWARE
926
+ are licensed to Licensee pursuant to the following terms and
927
+ conditions:
928
+
929
+ 1. Licensee's use of the GDB third party component is
930
+ subject to the terms and conditions of GNU GPL v3:
931
+
932
+ This product includes copyrighted third-party software licensed
933
+ under the terms of the GNU General Public License v3 ("GPL v3").
934
+ All third-party software packages are copyright by their respective
935
+ authors. GPL v3 terms and conditions are hereby incorporated into
936
+ the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt
937
+
938
+ Consistent with these licensing requirements, the software
939
+ listed below is provided under the terms of the specified
940
+ open source software licenses. To obtain source code for
941
+ software provided under licenses that require
942
+ redistribution of source code, including the GNU General
943
+ Public License (GPL) and GNU Lesser General Public License
944
+ (LGPL), contact oss-requests@nvidia.com. This offer is
945
+ valid for a period of three (3) years from the date of the
946
+ distribution of this product by NVIDIA CORPORATION.
947
+
948
+ Component License
949
+ CUDA-GDB GPL v3
950
+
951
+ 2. Licensee represents and warrants that any and all third
952
+ party licensing and/or royalty payment obligations in
953
+ connection with Licensee's use of the H.264 video codecs
954
+ are solely the responsibility of Licensee.
955
+
956
+ 3. Licensee's use of the Thrust library is subject to the
957
+ terms and conditions of the Apache License Version 2.0.
958
+ All third-party software packages are copyright by their
959
+ respective authors. Apache License Version 2.0 terms and
960
+ conditions are hereby incorporated into the Agreement by
961
+ this reference.
962
+ http://www.apache.org/licenses/LICENSE-2.0.html
963
+
964
+ In addition, Licensee acknowledges the following notice:
965
+ Thrust includes source code from the Boost Iterator,
966
+ Tuple, System, and Random Number libraries.
967
+
968
+ Boost Software License - Version 1.0 - August 17th, 2003
969
+ . . . .
970
+
971
+ Permission is hereby granted, free of charge, to any person or
972
+ organization obtaining a copy of the software and accompanying
973
+ documentation covered by this license (the "Software") to use,
974
+ reproduce, display, distribute, execute, and transmit the Software,
975
+ and to prepare derivative works of the Software, and to permit
976
+ third-parties to whom the Software is furnished to do so, all
977
+ subject to the following:
978
+
979
+ The copyright notices in the Software and this entire statement,
980
+ including the above license grant, this restriction and the following
981
+ disclaimer, must be included in all copies of the Software, in whole
982
+ or in part, and all derivative works of the Software, unless such
983
+ copies or derivative works are solely in the form of machine-executable
984
+ object code generated by a source language processor.
985
+
986
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
987
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
988
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
989
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
990
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
991
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
992
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
993
+ OTHER DEALINGS IN THE SOFTWARE.
994
+
995
+ 4. Licensee's use of the LLVM third party component is
996
+ subject to the following terms and conditions:
997
+
998
+ ======================================================
999
+ LLVM Release License
1000
+ ======================================================
1001
+ University of Illinois/NCSA
1002
+ Open Source License
1003
+
1004
+ Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
1005
+ All rights reserved.
1006
+
1007
+ Developed by:
1008
+
1009
+ LLVM Team
1010
+
1011
+ University of Illinois at Urbana-Champaign
1012
+
1013
+ http://llvm.org
1014
+
1015
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1016
+ of this software and associated documentation files (the "Software"), to
1017
+ deal with the Software without restriction, including without limitation the
1018
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
1019
+ sell copies of the Software, and to permit persons to whom the Software is
1020
+ furnished to do so, subject to the following conditions:
1021
+
1022
+ * Redistributions of source code must retain the above copyright notice,
1023
+ this list of conditions and the following disclaimers.
1024
+
1025
+ * Redistributions in binary form must reproduce the above copyright
1026
+ notice, this list of conditions and the following disclaimers in the
1027
+ documentation and/or other materials provided with the distribution.
1028
+
1029
+ * Neither the names of the LLVM Team, University of Illinois at Urbana-
1030
+ Champaign, nor the names of its contributors may be used to endorse or
1031
+ promote products derived from this Software without specific prior
1032
+ written permission.
1033
+
1034
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1035
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1036
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1037
+ THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
1038
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1039
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1040
+ DEALINGS WITH THE SOFTWARE.
1041
+
1042
+ 5. Licensee's use (e.g. nvprof) of the PCRE third party
1043
+ component is subject to the following terms and
1044
+ conditions:
1045
+
1046
+ ------------
1047
+ PCRE LICENCE
1048
+ ------------
1049
+ PCRE is a library of functions to support regular expressions whose syntax
1050
+ and semantics are as close as possible to those of the Perl 5 language.
1051
+ Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
1052
+ specified below. The documentation for PCRE, supplied in the "doc"
1053
+ directory, is distributed under the same terms as the software itself. The
1054
+ basic library functions are written in C and are freestanding. Also
1055
+ included in the distribution is a set of C++ wrapper functions, and a just-
1056
+ in-time compiler that can be used to optimize pattern matching. These are
1057
+ both optional features that can be omitted when the library is built.
1058
+
1059
+ THE BASIC LIBRARY FUNCTIONS
1060
+ ---------------------------
1061
+ Written by: Philip Hazel
1062
+ Email local part: ph10
1063
+ Email domain: cam.ac.uk
1064
+ University of Cambridge Computing Service,
1065
+ Cambridge, England.
1066
+ Copyright (c) 1997-2012 University of Cambridge
1067
+ All rights reserved.
1068
+
1069
+ PCRE JUST-IN-TIME COMPILATION SUPPORT
1070
+ -------------------------------------
1071
+ Written by: Zoltan Herczeg
1072
+ Email local part: hzmester
1073
+ Emain domain: freemail.hu
1074
+ Copyright(c) 2010-2012 Zoltan Herczeg
1075
+ All rights reserved.
1076
+
1077
+ STACK-LESS JUST-IN-TIME COMPILER
1078
+ --------------------------------
1079
+ Written by: Zoltan Herczeg
1080
+ Email local part: hzmester
1081
+ Emain domain: freemail.hu
1082
+ Copyright(c) 2009-2012 Zoltan Herczeg
1083
+ All rights reserved.
1084
+
1085
+ THE C++ WRAPPER FUNCTIONS
1086
+ -------------------------
1087
+ Contributed by: Google Inc.
1088
+ Copyright (c) 2007-2012, Google Inc.
1089
+ All rights reserved.
1090
+
1091
+ THE "BSD" LICENCE
1092
+ -----------------
1093
+ Redistribution and use in source and binary forms, with or without
1094
+ modification, are permitted provided that the following conditions are met:
1095
+
1096
+ * Redistributions of source code must retain the above copyright notice,
1097
+ this list of conditions and the following disclaimer.
1098
+
1099
+ * Redistributions in binary form must reproduce the above copyright
1100
+ notice, this list of conditions and the following disclaimer in the
1101
+ documentation and/or other materials provided with the distribution.
1102
+
1103
+ * Neither the name of the University of Cambridge nor the name of Google
1104
+ Inc. nor the names of their contributors may be used to endorse or
1105
+ promote products derived from this software without specific prior
1106
+ written permission.
1107
+
1108
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1109
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1110
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1111
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1112
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1113
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1114
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1115
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1116
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1117
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1118
+ POSSIBILITY OF SUCH DAMAGE.
1119
+
1120
+ 6. Some of the cuBLAS library routines were written by or
1121
+ derived from code written by Vasily Volkov and are subject
1122
+ to the Modified Berkeley Software Distribution License as
1123
+ follows:
1124
+
1125
+ Copyright (c) 2007-2009, Regents of the University of California
1126
+
1127
+ All rights reserved.
1128
+
1129
+ Redistribution and use in source and binary forms, with or without
1130
+ modification, are permitted provided that the following conditions are
1131
+ met:
1132
+ * Redistributions of source code must retain the above copyright
1133
+ notice, this list of conditions and the following disclaimer.
1134
+ * Redistributions in binary form must reproduce the above
1135
+ copyright notice, this list of conditions and the following
1136
+ disclaimer in the documentation and/or other materials provided
1137
+ with the distribution.
1138
+ * Neither the name of the University of California, Berkeley nor
1139
+ the names of its contributors may be used to endorse or promote
1140
+ products derived from this software without specific prior
1141
+ written permission.
1142
+
1143
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1144
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1145
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1146
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1147
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1148
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1149
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1150
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1151
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1152
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1153
+ POSSIBILITY OF SUCH DAMAGE.
1154
+
1155
+ 7. Some of the cuBLAS library routines were written by or
1156
+ derived from code written by Davide Barbieri and are
1157
+ subject to the Modified Berkeley Software Distribution
1158
+ License as follows:
1159
+
1160
+ Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
1161
+
1162
+ All rights reserved.
1163
+
1164
+ Redistribution and use in source and binary forms, with or without
1165
+ modification, are permitted provided that the following conditions are
1166
+ met:
1167
+ * Redistributions of source code must retain the above copyright
1168
+ notice, this list of conditions and the following disclaimer.
1169
+ * Redistributions in binary form must reproduce the above
1170
+ copyright notice, this list of conditions and the following
1171
+ disclaimer in the documentation and/or other materials provided
1172
+ with the distribution.
1173
+ * The name of the author may not be used to endorse or promote
1174
+ products derived from this software without specific prior
1175
+ written permission.
1176
+
1177
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1178
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1179
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1180
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1181
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1182
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1183
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1184
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1185
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1186
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1187
+ POSSIBILITY OF SUCH DAMAGE.
1188
+
1189
+ 8. Some of the cuBLAS library routines were derived from
1190
+ code developed by the University of Tennessee and are
1191
+ subject to the Modified Berkeley Software Distribution
1192
+ License as follows:
1193
+
1194
+ Copyright (c) 2010 The University of Tennessee.
1195
+
1196
+ All rights reserved.
1197
+
1198
+ Redistribution and use in source and binary forms, with or without
1199
+ modification, are permitted provided that the following conditions are
1200
+ met:
1201
+ * Redistributions of source code must retain the above copyright
1202
+ notice, this list of conditions and the following disclaimer.
1203
+ * Redistributions in binary form must reproduce the above
1204
+ copyright notice, this list of conditions and the following
1205
+ disclaimer listed in this license in the documentation and/or
1206
+ other materials provided with the distribution.
1207
+ * Neither the name of the copyright holders nor the names of its
1208
+ contributors may be used to endorse or promote products derived
1209
+ from this software without specific prior written permission.
1210
+
1211
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1212
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1213
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1214
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1215
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1216
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1217
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1218
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1219
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1220
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1221
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1222
+
1223
+ 9. Some of the cuBLAS library routines were written by or
1224
+ derived from code written by Jonathan Hogg and are subject
1225
+ to the Modified Berkeley Software Distribution License as
1226
+ follows:
1227
+
1228
+ Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
1229
+
1230
+ All rights reserved.
1231
+
1232
+ Redistribution and use in source and binary forms, with or without
1233
+ modification, are permitted provided that the following conditions are
1234
+ met:
1235
+ * Redistributions of source code must retain the above copyright
1236
+ notice, this list of conditions and the following disclaimer.
1237
+ * Redistributions in binary form must reproduce the above
1238
+ copyright notice, this list of conditions and the following
1239
+ disclaimer in the documentation and/or other materials provided
1240
+ with the distribution.
1241
+ * Neither the name of the STFC nor the names of its contributors
1242
+ may be used to endorse or promote products derived from this
1243
+ software without specific prior written permission.
1244
+
1245
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1246
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1247
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1248
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
1249
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1250
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1251
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
1252
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1253
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
1254
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
1255
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1256
+
1257
+ 10. Some of the cuBLAS library routines were written by or
1258
+ derived from code written by Ahmad M. Abdelfattah, David
1259
+ Keyes, and Hatem Ltaief, and are subject to the Apache
1260
+ License, Version 2.0, as follows:
1261
+
1262
+ -- (C) Copyright 2013 King Abdullah University of Science and Technology
1263
+ Authors:
1264
+ Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
1265
+ David Keyes (david.keyes@kaust.edu.sa)
1266
+ Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
1267
+
1268
+ Redistribution and use in source and binary forms, with or without
1269
+ modification, are permitted provided that the following conditions
1270
+ are met:
1271
+
1272
+ * Redistributions of source code must retain the above copyright
1273
+ notice, this list of conditions and the following disclaimer.
1274
+ * Redistributions in binary form must reproduce the above copyright
1275
+ notice, this list of conditions and the following disclaimer in the
1276
+ documentation and/or other materials provided with the distribution.
1277
+ * Neither the name of the King Abdullah University of Science and
1278
+ Technology nor the names of its contributors may be used to endorse
1279
+ or promote products derived from this software without specific prior
1280
+ written permission.
1281
+
1282
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1283
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1284
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1285
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1286
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1287
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1288
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1289
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1290
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1291
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1292
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
1293
+
1294
+ 11. Some of the cuSPARSE library routines were written by or
1295
+ derived from code written by Li-Wen Chang and are subject
1296
+ to the NCSA Open Source License as follows:
1297
+
1298
+ Copyright (c) 2012, University of Illinois.
1299
+
1300
+ All rights reserved.
1301
+
1302
+ Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
1303
+
1304
+ Permission is hereby granted, free of charge, to any person obtaining
1305
+ a copy of this software and associated documentation files (the
1306
+ "Software"), to deal with the Software without restriction, including
1307
+ without limitation the rights to use, copy, modify, merge, publish,
1308
+ distribute, sublicense, and/or sell copies of the Software, and to
1309
+ permit persons to whom the Software is furnished to do so, subject to
1310
+ the following conditions:
1311
+ * Redistributions of source code must retain the above copyright
1312
+ notice, this list of conditions and the following disclaimer.
1313
+ * Redistributions in binary form must reproduce the above
1314
+ copyright notice, this list of conditions and the following
1315
+ disclaimers in the documentation and/or other materials provided
1316
+ with the distribution.
1317
+ * Neither the names of IMPACT Group, University of Illinois, nor
1318
+ the names of its contributors may be used to endorse or promote
1319
+ products derived from this Software without specific prior
1320
+ written permission.
1321
+
1322
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1323
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1324
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1325
+ NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
1326
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1327
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1328
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
1329
+ SOFTWARE.
1330
+
1331
+ 12. Some of the cuRAND library routines were written by or
1332
+ derived from code written by Mutsuo Saito and Makoto
1333
+ Matsumoto and are subject to the following license:
1334
+
1335
+ Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
1336
+ University. All rights reserved.
1337
+
1338
+ Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
1339
+ University and University of Tokyo. All rights reserved.
1340
+
1341
+ Redistribution and use in source and binary forms, with or without
1342
+ modification, are permitted provided that the following conditions are
1343
+ met:
1344
+ * Redistributions of source code must retain the above copyright
1345
+ notice, this list of conditions and the following disclaimer.
1346
+ * Redistributions in binary form must reproduce the above
1347
+ copyright notice, this list of conditions and the following
1348
+ disclaimer in the documentation and/or other materials provided
1349
+ with the distribution.
1350
+ * Neither the name of the Hiroshima University nor the names of
1351
+ its contributors may be used to endorse or promote products
1352
+ derived from this software without specific prior written
1353
+ permission.
1354
+
1355
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1356
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1357
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1358
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1359
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1360
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1361
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1362
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1363
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1364
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1365
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1366
+
1367
+ 13. Some of the cuRAND library routines were derived from
1368
+ code developed by D. E. Shaw Research and are subject to
1369
+ the following license:
1370
+
1371
+ Copyright 2010-2011, D. E. Shaw Research.
1372
+
1373
+ All rights reserved.
1374
+
1375
+ Redistribution and use in source and binary forms, with or without
1376
+ modification, are permitted provided that the following conditions are
1377
+ met:
1378
+ * Redistributions of source code must retain the above copyright
1379
+ notice, this list of conditions, and the following disclaimer.
1380
+ * Redistributions in binary form must reproduce the above
1381
+ copyright notice, this list of conditions, and the following
1382
+ disclaimer in the documentation and/or other materials provided
1383
+ with the distribution.
1384
+ * Neither the name of D. E. Shaw Research nor the names of its
1385
+ contributors may be used to endorse or promote products derived
1386
+ from this software without specific prior written permission.
1387
+
1388
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1389
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1390
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1391
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1392
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1393
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1394
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1395
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1396
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1397
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1398
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1399
+
1400
+ 14. Some of the Math library routines were written by or
1401
+ derived from code developed by Norbert Juffa and are
1402
+ subject to the following license:
1403
+
1404
+ Copyright (c) 2015-2017, Norbert Juffa
1405
+ All rights reserved.
1406
+
1407
+ Redistribution and use in source and binary forms, with or without
1408
+ modification, are permitted provided that the following conditions
1409
+ are met:
1410
+
1411
+ 1. Redistributions of source code must retain the above copyright
1412
+ notice, this list of conditions and the following disclaimer.
1413
+
1414
+ 2. Redistributions in binary form must reproduce the above copyright
1415
+ notice, this list of conditions and the following disclaimer in the
1416
+ documentation and/or other materials provided with the distribution.
1417
+
1418
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1419
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1420
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1421
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1422
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1423
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1424
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1425
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1426
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1427
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1428
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1429
+
1430
+ 15. Licensee's use of the lz4 third party component is
1431
+ subject to the following terms and conditions:
1432
+
1433
+ Copyright (C) 2011-2013, Yann Collet.
1434
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1435
+
1436
+ Redistribution and use in source and binary forms, with or without
1437
+ modification, are permitted provided that the following conditions are
1438
+ met:
1439
+
1440
+ * Redistributions of source code must retain the above copyright
1441
+ notice, this list of conditions and the following disclaimer.
1442
+ * Redistributions in binary form must reproduce the above
1443
+ copyright notice, this list of conditions and the following disclaimer
1444
+ in the documentation and/or other materials provided with the
1445
+ distribution.
1446
+
1447
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1448
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1449
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1450
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1451
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1452
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1453
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1454
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1455
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1456
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1457
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1458
+
1459
+ 16. The NPP library uses code from the Boost Math Toolkit,
1460
+ and is subject to the following license:
1461
+
1462
+ Boost Software License - Version 1.0 - August 17th, 2003
1463
+ . . . .
1464
+
1465
+ Permission is hereby granted, free of charge, to any person or
1466
+ organization obtaining a copy of the software and accompanying
1467
+ documentation covered by this license (the "Software") to use,
1468
+ reproduce, display, distribute, execute, and transmit the Software,
1469
+ and to prepare derivative works of the Software, and to permit
1470
+ third-parties to whom the Software is furnished to do so, all
1471
+ subject to the following:
1472
+
1473
+ The copyright notices in the Software and this entire statement,
1474
+ including the above license grant, this restriction and the following
1475
+ disclaimer, must be included in all copies of the Software, in whole
1476
+ or in part, and all derivative works of the Software, unless such
1477
+ copies or derivative works are solely in the form of machine-executable
1478
+ object code generated by a source language processor.
1479
+
1480
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1481
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1482
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
1483
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
1484
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
1485
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
1486
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
1487
+ OTHER DEALINGS IN THE SOFTWARE.
1488
+
1489
+ 17. Portions of the Nsight Eclipse Edition is subject to the
1490
+ following license:
1491
+
1492
+ The Eclipse Foundation makes available all content in this plug-in
1493
+ ("Content"). Unless otherwise indicated below, the Content is provided
1494
+ to you under the terms and conditions of the Eclipse Public License
1495
+ Version 1.0 ("EPL"). A copy of the EPL is available at http://
1496
+ www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
1497
+ will mean the Content.
1498
+
1499
+ If you did not receive this Content directly from the Eclipse
1500
+ Foundation, the Content is being redistributed by another party
1501
+ ("Redistributor") and different terms and conditions may apply to your
1502
+ use of any object code in the Content. Check the Redistributor's
1503
+ license that was provided with the Content. If no such license exists,
1504
+ contact the Redistributor. Unless otherwise indicated below, the terms
1505
+ and conditions of the EPL still apply to any source code in the
1506
+ Content and such source code may be obtained at http://www.eclipse.org.
1507
+
1508
+ 18. Some of the cuBLAS library routines uses code from
1509
+ OpenAI, which is subject to the following license:
1510
+
1511
+ License URL
1512
+ https://github.com/openai/openai-gemm/blob/master/LICENSE
1513
+
1514
+ License Text
1515
+ The MIT License
1516
+
1517
+ Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
1518
+
1519
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1520
+ of this software and associated documentation files (the "Software"), to deal
1521
+ in the Software without restriction, including without limitation the rights
1522
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1523
+ copies of the Software, and to permit persons to whom the Software is
1524
+ furnished to do so, subject to the following conditions:
1525
+
1526
+ The above copyright notice and this permission notice shall be included in
1527
+ all copies or substantial portions of the Software.
1528
+
1529
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1530
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1531
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1532
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1533
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1534
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1535
+ THE SOFTWARE.
1536
+
1537
+ 19. Licensee's use of the Visual Studio Setup Configuration
1538
+ Samples is subject to the following license:
1539
+
1540
+ The MIT License (MIT)
1541
+ Copyright (C) Microsoft Corporation. All rights reserved.
1542
+
1543
+ Permission is hereby granted, free of charge, to any person
1544
+ obtaining a copy of this software and associated documentation
1545
+ files (the "Software"), to deal in the Software without restriction,
1546
+ including without limitation the rights to use, copy, modify, merge,
1547
+ publish, distribute, sublicense, and/or sell copies of the Software,
1548
+ and to permit persons to whom the Software is furnished to do so,
1549
+ subject to the following conditions:
1550
+
1551
+ The above copyright notice and this permission notice shall be included
1552
+ in all copies or substantial portions of the Software.
1553
+
1554
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1555
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1556
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1557
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1558
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1559
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1560
+
1561
+ 20. Licensee's use of linmath.h header for CPU functions for
1562
+ GL vector/matrix operations from lunarG is subject to the
1563
+ Apache License Version 2.0.
1564
+
1565
+ 21. The DX12-CUDA sample uses the d3dx12.h header, which is
1566
+ subject to the MIT license .
1567
+
1568
+ -----------------
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cuda_runtime_cu11-11.8.89.dist-info/License.txt ADDED
@@ -0,0 +1,1568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ End User License Agreement
2
+ --------------------------
3
+
4
+
5
+ Preface
6
+ -------
7
+
8
+ The Software License Agreement in Chapter 1 and the Supplement
9
+ in Chapter 2 contain license terms and conditions that govern
10
+ the use of NVIDIA software. By accepting this agreement, you
11
+ agree to comply with all the terms and conditions applicable
12
+ to the product(s) included herein.
13
+
14
+
15
+ NVIDIA Driver
16
+
17
+
18
+ Description
19
+
20
+ This package contains the operating system driver and
21
+ fundamental system software components for NVIDIA GPUs.
22
+
23
+
24
+ NVIDIA CUDA Toolkit
25
+
26
+
27
+ Description
28
+
29
+ The NVIDIA CUDA Toolkit provides command-line and graphical
30
+ tools for building, debugging and optimizing the performance
31
+ of applications accelerated by NVIDIA GPUs, runtime and math
32
+ libraries, and documentation including programming guides,
33
+ user manuals, and API references.
34
+
35
+
36
+ Default Install Location of CUDA Toolkit
37
+
38
+ Windows platform:
39
+
40
+ %ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
41
+
42
+ Linux platform:
43
+
44
+ /usr/local/cuda-#.#
45
+
46
+ Mac platform:
47
+
48
+ /Developer/NVIDIA/CUDA-#.#
49
+
50
+
51
+ NVIDIA CUDA Samples
52
+
53
+
54
+ Description
55
+
56
+ This package includes over 100+ CUDA examples that demonstrate
57
+ various CUDA programming principles, and efficient CUDA
58
+ implementation of algorithms in specific application domains.
59
+
60
+
61
+ Default Install Location of CUDA Samples
62
+
63
+ Windows platform:
64
+
65
+ %ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
66
+
67
+ Linux platform:
68
+
69
+ /usr/local/cuda-#.#/samples
70
+
71
+ and
72
+
73
+ $HOME/NVIDIA_CUDA-#.#_Samples
74
+
75
+ Mac platform:
76
+
77
+ /Developer/NVIDIA/CUDA-#.#/samples
78
+
79
+
80
+ NVIDIA Nsight Visual Studio Edition (Windows only)
81
+
82
+
83
+ Description
84
+
85
+ NVIDIA Nsight Development Platform, Visual Studio Edition is a
86
+ development environment integrated into Microsoft Visual
87
+ Studio that provides tools for debugging, profiling, analyzing
88
+ and optimizing your GPU computing and graphics applications.
89
+
90
+
91
+ Default Install Location of Nsight Visual Studio Edition
92
+
93
+ Windows platform:
94
+
95
+ %ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
96
+
97
+
98
+ 1. License Agreement for NVIDIA Software Development Kits
99
+ ---------------------------------------------------------
100
+
101
+
102
+ Release Date: July 26, 2018
103
+ ---------------------------
104
+
105
+
106
+ Important NoticeRead before downloading, installing,
107
+ copying or using the licensed software:
108
+ -------------------------------------------------------
109
+
110
+ This license agreement, including exhibits attached
111
+ ("Agreement”) is a legal agreement between you and NVIDIA
112
+ Corporation ("NVIDIA") and governs your use of a NVIDIA
113
+ software development kit (“SDK”).
114
+
115
+ Each SDK has its own set of software and materials, but here
116
+ is a description of the types of items that may be included in
117
+ a SDK: source code, header files, APIs, data sets and assets
118
+ (examples include images, textures, models, scenes, videos,
119
+ native API input/output files), binary software, sample code,
120
+ libraries, utility programs, programming code and
121
+ documentation.
122
+
123
+ This Agreement can be accepted only by an adult of legal age
124
+ of majority in the country in which the SDK is used.
125
+
126
+ If you are entering into this Agreement on behalf of a company
127
+ or other legal entity, you represent that you have the legal
128
+ authority to bind the entity to this Agreement, in which case
129
+ “you” will mean the entity you represent.
130
+
131
+ If you don’t have the required age or authority to accept
132
+ this Agreement, or if you don’t accept all the terms and
133
+ conditions of this Agreement, do not download, install or use
134
+ the SDK.
135
+
136
+ You agree to use the SDK only for purposes that are permitted
137
+ by (a) this Agreement, and (b) any applicable law, regulation
138
+ or generally accepted practices or guidelines in the relevant
139
+ jurisdictions.
140
+
141
+
142
+ 1.1. License
143
+
144
+
145
+ 1.1.1. License Grant
146
+
147
+ Subject to the terms of this Agreement, NVIDIA hereby grants
148
+ you a non-exclusive, non-transferable license, without the
149
+ right to sublicense (except as expressly provided in this
150
+ Agreement) to:
151
+
152
+ 1. Install and use the SDK,
153
+
154
+ 2. Modify and create derivative works of sample source code
155
+ delivered in the SDK, and
156
+
157
+ 3. Distribute those portions of the SDK that are identified
158
+ in this Agreement as distributable, as incorporated in
159
+ object code format into a software application that meets
160
+ the distribution requirements indicated in this Agreement.
161
+
162
+
163
+ 1.1.2. Distribution Requirements
164
+
165
+ These are the distribution requirements for you to exercise
166
+ the distribution grant:
167
+
168
+ 1. Your application must have material additional
169
+ functionality, beyond the included portions of the SDK.
170
+
171
+ 2. The distributable portions of the SDK shall only be
172
+ accessed by your application.
173
+
174
+ 3. The following notice shall be included in modifications
175
+ and derivative works of sample source code distributed:
176
+ “This software contains source code provided by NVIDIA
177
+ Corporation.”
178
+
179
+ 4. Unless a developer tool is identified in this Agreement
180
+ as distributable, it is delivered for your internal use
181
+ only.
182
+
183
+ 5. The terms under which you distribute your application
184
+ must be consistent with the terms of this Agreement,
185
+ including (without limitation) terms relating to the
186
+ license grant and license restrictions and protection of
187
+ NVIDIA’s intellectual property rights. Additionally, you
188
+ agree that you will protect the privacy, security and
189
+ legal rights of your application users.
190
+
191
+ 6. You agree to notify NVIDIA in writing of any known or
192
+ suspected distribution or use of the SDK not in compliance
193
+ with the requirements of this Agreement, and to enforce
194
+ the terms of your agreements with respect to distributed
195
+ SDK.
196
+
197
+
198
+ 1.1.3. Authorized Users
199
+
200
+ You may allow employees and contractors of your entity or of
201
+ your subsidiary(ies) to access and use the SDK from your
202
+ secure network to perform work on your behalf.
203
+
204
+ If you are an academic institution you may allow users
205
+ enrolled or employed by the academic institution to access and
206
+ use the SDK from your secure network.
207
+
208
+ You are responsible for the compliance with the terms of this
209
+ Agreement by your authorized users. If you become aware that
210
+ your authorized users didn’t follow the terms of this
211
+ Agreement, you agree to take reasonable steps to resolve the
212
+ non-compliance and prevent new occurrences.
213
+
214
+
215
+ 1.1.4. Pre-Release SDK
216
+
217
+ The SDK versions identified as alpha, beta, preview or
218
+ otherwise as pre-release, may not be fully functional, may
219
+ contain errors or design flaws, and may have reduced or
220
+ different security, privacy, accessibility, availability, and
221
+ reliability standards relative to commercial versions of
222
+ NVIDIA software and materials. Use of a pre-release SDK may
223
+ result in unexpected results, loss of data, project delays or
224
+ other unpredictable damage or loss.
225
+
226
+ You may use a pre-release SDK at your own risk, understanding
227
+ that pre-release SDKs are not intended for use in production
228
+ or business-critical systems.
229
+
230
+ NVIDIA may choose not to make available a commercial version
231
+ of any pre-release SDK. NVIDIA may also choose to abandon
232
+ development and terminate the availability of a pre-release
233
+ SDK at any time without liability.
234
+
235
+
236
+ 1.1.5. Updates
237
+
238
+ NVIDIA may, at its option, make available patches, workarounds
239
+ or other updates to this SDK. Unless the updates are provided
240
+ with their separate governing terms, they are deemed part of
241
+ the SDK licensed to you as provided in this Agreement. You
242
+ agree that the form and content of the SDK that NVIDIA
243
+ provides may change without prior notice to you. While NVIDIA
244
+ generally maintains compatibility between versions, NVIDIA may
245
+ in some cases make changes that introduce incompatibilities in
246
+ future versions of the SDK.
247
+
248
+
249
+ 1.1.6. Third Party Licenses
250
+
251
+ The SDK may come bundled with, or otherwise include or be
252
+ distributed with, third party software licensed by a NVIDIA
253
+ supplier and/or open source software provided under an open
254
+ source license. Use of third party software is subject to the
255
+ third-party license terms, or in the absence of third party
256
+ terms, the terms of this Agreement. Copyright to third party
257
+ software is held by the copyright holders indicated in the
258
+ third-party software or license.
259
+
260
+
261
+ 1.1.7. Reservation of Rights
262
+
263
+ NVIDIA reserves all rights, title, and interest in and to the
264
+ SDK, not expressly granted to you under this Agreement.
265
+
266
+
267
+ 1.2. Limitations
268
+
269
+ The following license limitations apply to your use of the
270
+ SDK:
271
+
272
+ 1. You may not reverse engineer, decompile or disassemble,
273
+ or remove copyright or other proprietary notices from any
274
+ portion of the SDK or copies of the SDK.
275
+
276
+ 2. Except as expressly provided in this Agreement, you may
277
+ not copy, sell, rent, sublicense, transfer, distribute,
278
+ modify, or create derivative works of any portion of the
279
+ SDK. For clarity, you may not distribute or sublicense the
280
+ SDK as a stand-alone product.
281
+
282
+ 3. Unless you have an agreement with NVIDIA for this
283
+ purpose, you may not indicate that an application created
284
+ with the SDK is sponsored or endorsed by NVIDIA.
285
+
286
+ 4. You may not bypass, disable, or circumvent any
287
+ encryption, security, digital rights management or
288
+ authentication mechanism in the SDK.
289
+
290
+ 5. You may not use the SDK in any manner that would cause it
291
+ to become subject to an open source software license. As
292
+ examples, licenses that require as a condition of use,
293
+ modification, and/or distribution that the SDK be:
294
+
295
+ a. Disclosed or distributed in source code form;
296
+
297
+ b. Licensed for the purpose of making derivative works;
298
+ or
299
+
300
+ c. Redistributable at no charge.
301
+
302
+ 6. Unless you have an agreement with NVIDIA for this
303
+ purpose, you may not use the SDK with any system or
304
+ application where the use or failure of the system or
305
+ application can reasonably be expected to threaten or
306
+ result in personal injury, death, or catastrophic loss.
307
+ Examples include use in avionics, navigation, military,
308
+ medical, life support or other life critical applications.
309
+ NVIDIA does not design, test or manufacture the SDK for
310
+ these critical uses and NVIDIA shall not be liable to you
311
+ or any third party, in whole or in part, for any claims or
312
+ damages arising from such uses.
313
+
314
+ 7. You agree to defend, indemnify and hold harmless NVIDIA
315
+ and its affiliates, and their respective employees,
316
+ contractors, agents, officers and directors, from and
317
+ against any and all claims, damages, obligations, losses,
318
+ liabilities, costs or debt, fines, restitutions and
319
+ expenses (including but not limited to attorney’s fees
320
+ and costs incident to establishing the right of
321
+ indemnification) arising out of or related to your use of
322
+ the SDK outside of the scope of this Agreement, or not in
323
+ compliance with its terms.
324
+
325
+
326
+ 1.3. Ownership
327
+
328
+ 1. NVIDIA or its licensors hold all rights, title and
329
+ interest in and to the SDK and its modifications and
330
+ derivative works, including their respective intellectual
331
+ property rights, subject to your rights described in this
332
+ section. This SDK may include software and materials from
333
+ NVIDIA’s licensors, and these licensors are intended
334
+ third party beneficiaries that may enforce this Agreement
335
+ with respect to their intellectual property rights.
336
+
337
+ 2. You hold all rights, title and interest in and to your
338
+ applications and your derivative works of the sample
339
+ source code delivered in the SDK, including their
340
+ respective intellectual property rights, subject to
341
+ NVIDIA’s rights described in this section.
342
+
343
+ 3. You may, but don’t have to, provide to NVIDIA
344
+ suggestions, feature requests or other feedback regarding
345
+ the SDK, including possible enhancements or modifications
346
+ to the SDK. For any feedback that you voluntarily provide,
347
+ you hereby grant NVIDIA and its affiliates a perpetual,
348
+ non-exclusive, worldwide, irrevocable license to use,
349
+ reproduce, modify, license, sublicense (through multiple
350
+ tiers of sublicensees), and distribute (through multiple
351
+ tiers of distributors) it without the payment of any
352
+ royalties or fees to you. NVIDIA will use feedback at its
353
+ choice. NVIDIA is constantly looking for ways to improve
354
+ its products, so you may send feedback to NVIDIA through
355
+ the developer portal at https://developer.nvidia.com.
356
+
357
+
358
+ 1.4. No Warranties
359
+
360
+ THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
361
+ FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
362
+ ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
363
+ OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
364
+ BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
365
+ FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
366
+ ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
367
+ WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
368
+ DEALING OR COURSE OF TRADE.
369
+
370
+
371
+ 1.5. Limitation of Liability
372
+
373
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
374
+ AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
375
+ PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
376
+ OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
377
+ PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
378
+ WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
379
+ WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
380
+ OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
381
+ PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
382
+ LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
383
+ TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
384
+ AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
385
+ NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
386
+ LIMIT.
387
+
388
+ These exclusions and limitations of liability shall apply
389
+ regardless if NVIDIA or its affiliates have been advised of
390
+ the possibility of such damages, and regardless of whether a
391
+ remedy fails its essential purpose. These exclusions and
392
+ limitations of liability form an essential basis of the
393
+ bargain between the parties, and, absent any of these
394
+ exclusions or limitations of liability, the provisions of this
395
+ Agreement, including, without limitation, the economic terms,
396
+ would be substantially different.
397
+
398
+
399
+ 1.6. Termination
400
+
401
+ 1. This Agreement will continue to apply until terminated by
402
+ either you or NVIDIA as described below.
403
+
404
+ 2. If you want to terminate this Agreement, you may do so by
405
+ stopping to use the SDK.
406
+
407
+ 3. NVIDIA may, at any time, terminate this Agreement if:
408
+
409
+ a. (i) you fail to comply with any term of this
410
+ Agreement and the non-compliance is not fixed within
411
+ thirty (30) days following notice from NVIDIA (or
412
+ immediately if you violate NVIDIA’s intellectual
413
+ property rights);
414
+
415
+ b. (ii) you commence or participate in any legal
416
+ proceeding against NVIDIA with respect to the SDK; or
417
+
418
+ c. (iii) NVIDIA decides to no longer provide the SDK in
419
+ a country or, in NVIDIA’s sole discretion, the
420
+ continued use of it is no longer commercially viable.
421
+
422
+ 4. Upon any termination of this Agreement, you agree to
423
+ promptly discontinue use of the SDK and destroy all copies
424
+ in your possession or control. Your prior distributions in
425
+ accordance with this Agreement are not affected by the
426
+ termination of this Agreement. Upon written request, you
427
+ will certify in writing that you have complied with your
428
+ commitments under this section. Upon any termination of
429
+ this Agreement all provisions survive except for the
430
+ license grant provisions.
431
+
432
+
433
+ 1.7. General
434
+
435
+ If you wish to assign this Agreement or your rights and
436
+ obligations, including by merger, consolidation, dissolution
437
+ or operation of law, contact NVIDIA to ask for permission. Any
438
+ attempted assignment not approved by NVIDIA in writing shall
439
+ be void and of no effect. NVIDIA may assign, delegate or
440
+ transfer this Agreement and its rights and obligations, and if
441
+ to a non-affiliate you will be notified.
442
+
443
+ You agree to cooperate with NVIDIA and provide reasonably
444
+ requested information to verify your compliance with this
445
+ Agreement.
446
+
447
+ This Agreement will be governed in all respects by the laws of
448
+ the United States and of the State of Delaware as those laws
449
+ are applied to contracts entered into and performed entirely
450
+ within Delaware by Delaware residents, without regard to the
451
+ conflicts of laws principles. The United Nations Convention on
452
+ Contracts for the International Sale of Goods is specifically
453
+ disclaimed. You agree to all terms of this Agreement in the
454
+ English language.
455
+
456
+ The state or federal courts residing in Santa Clara County,
457
+ California shall have exclusive jurisdiction over any dispute
458
+ or claim arising out of this Agreement. Notwithstanding this,
459
+ you agree that NVIDIA shall still be allowed to apply for
460
+ injunctive remedies or an equivalent type of urgent legal
461
+ relief in any jurisdiction.
462
+
463
+ If any court of competent jurisdiction determines that any
464
+ provision of this Agreement is illegal, invalid or
465
+ unenforceable, such provision will be construed as limited to
466
+ the extent necessary to be consistent with and fully
467
+ enforceable under the law and the remaining provisions will
468
+ remain in full force and effect. Unless otherwise specified,
469
+ remedies are cumulative.
470
+
471
+ Each party acknowledges and agrees that the other is an
472
+ independent contractor in the performance of this Agreement.
473
+
474
+ The SDK has been developed entirely at private expense and is
475
+ “commercial items” consisting of “commercial computer
476
+ software” and “commercial computer software
477
+ documentation” provided with RESTRICTED RIGHTS. Use,
478
+ duplication or disclosure by the U.S. Government or a U.S.
479
+ Government subcontractor is subject to the restrictions in
480
+ this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
481
+ in subparagraphs (c)(1) and (2) of the Commercial Computer
482
+ Software - Restricted Rights clause at FAR 52.227-19, as
483
+ applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
484
+ Expressway, Santa Clara, CA 95051.
485
+
486
+ The SDK is subject to United States export laws and
487
+ regulations. You agree that you will not ship, transfer or
488
+ export the SDK into any country, or use the SDK in any manner,
489
+ prohibited by the United States Bureau of Industry and
490
+ Security or economic sanctions regulations administered by the
491
+ U.S. Department of Treasury’s Office of Foreign Assets
492
+ Control (OFAC), or any applicable export laws, restrictions or
493
+ regulations. These laws include restrictions on destinations,
494
+ end users and end use. By accepting this Agreement, you
495
+ confirm that you are not a resident or citizen of any country
496
+ currently embargoed by the U.S. and that you are not otherwise
497
+ prohibited from receiving the SDK.
498
+
499
+ Any notice delivered by NVIDIA to you under this Agreement
500
+ will be delivered via mail, email or fax. You agree that any
501
+ notices that NVIDIA sends you electronically will satisfy any
502
+ legal communication requirements. Please direct your legal
503
+ notices or other correspondence to NVIDIA Corporation, 2788
504
+ San Tomas Expressway, Santa Clara, California 95051, United
505
+ States of America, Attention: Legal Department.
506
+
507
+ This Agreement and any exhibits incorporated into this
508
+ Agreement constitute the entire agreement of the parties with
509
+ respect to the subject matter of this Agreement and supersede
510
+ all prior negotiations or documentation exchanged between the
511
+ parties relating to this SDK license. Any additional and/or
512
+ conflicting terms on documents issued by you are null, void,
513
+ and invalid. Any amendment or waiver under this Agreement
514
+ shall be in writing and signed by representatives of both
515
+ parties.
516
+
517
+
518
+ 2. CUDA Toolkit Supplement to Software License Agreement for
519
+ NVIDIA Software Development Kits
520
+ ------------------------------------------------------------
521
+
522
+
523
+ Release date: August 16, 2018
524
+ -----------------------------
525
+
526
+ The terms in this supplement govern your use of the NVIDIA
527
+ CUDA Toolkit SDK under the terms of your license agreement
528
+ (“Agreement”) as modified by this supplement. Capitalized
529
+ terms used but not defined below have the meaning assigned to
530
+ them in the Agreement.
531
+
532
+ This supplement is an exhibit to the Agreement and is
533
+ incorporated as an integral part of the Agreement. In the
534
+ event of conflict between the terms in this supplement and the
535
+ terms in the Agreement, the terms in this supplement govern.
536
+
537
+
538
+ 2.1. License Scope
539
+
540
+ The SDK is licensed for you to develop applications only for
541
+ use in systems with NVIDIA GPUs.
542
+
543
+
544
+ 2.2. Distribution
545
+
546
+ The portions of the SDK that are distributable under the
547
+ Agreement are listed in Attachment A.
548
+
549
+
550
+ 2.3. Operating Systems
551
+
552
+ Those portions of the SDK designed exclusively for use on the
553
+ Linux or FreeBSD operating systems, or other operating systems
554
+ derived from the source code to these operating systems, may
555
+ be copied and redistributed for use in accordance with this
556
+ Agreement, provided that the object code files are not
557
+ modified in any way (except for unzipping of compressed
558
+ files).
559
+
560
+
561
+ 2.4. Audio and Video Encoders and Decoders
562
+
563
+ You acknowledge and agree that it is your sole responsibility
564
+ to obtain any additional third-party licenses required to
565
+ make, have made, use, have used, sell, import, and offer for
566
+ sale your products or services that include or incorporate any
567
+ third-party software and content relating to audio and/or
568
+ video encoders and decoders from, including but not limited
569
+ to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
570
+ MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
571
+ under this Agreement any necessary patent or other rights with
572
+ respect to any audio and/or video encoders and decoders.
573
+
574
+
575
+ 2.5. Licensing
576
+
577
+ If the distribution terms in this Agreement are not suitable
578
+ for your organization, or for any questions regarding this
579
+ Agreement, please contact NVIDIA at
580
+ nvidia-compute-license-questions@nvidia.com.
581
+
582
+
583
+ 2.6. Attachment A
584
+
585
+ The following portions of the SDK are distributable under the
586
+ Agreement:
587
+
588
+ Component
589
+
590
+ CUDA Runtime
591
+
592
+ Windows
593
+
594
+ cudart.dll, cudart_static.lib, cudadevrt.lib
595
+
596
+ Mac OSX
597
+
598
+ libcudart.dylib, libcudart_static.a, libcudadevrt.a
599
+
600
+ Linux
601
+
602
+ libcudart.so, libcudart_static.a, libcudadevrt.a
603
+
604
+ Android
605
+
606
+ libcudart.so, libcudart_static.a, libcudadevrt.a
607
+
608
+ Component
609
+
610
+ CUDA FFT Library
611
+
612
+ Windows
613
+
614
+ cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
615
+
616
+ Mac OSX
617
+
618
+ libcufft.dylib, libcufft_static.a, libcufftw.dylib,
619
+ libcufftw_static.a
620
+
621
+ Linux
622
+
623
+ libcufft.so, libcufft_static.a, libcufftw.so,
624
+ libcufftw_static.a
625
+
626
+ Android
627
+
628
+ libcufft.so, libcufft_static.a, libcufftw.so,
629
+ libcufftw_static.a
630
+
631
+ Component
632
+
633
+ CUDA BLAS Library
634
+
635
+ Windows
636
+
637
+ cublas.dll, cublasLt.dll
638
+
639
+ Mac OSX
640
+
641
+ libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
642
+ libcublasLt_static.a
643
+
644
+ Linux
645
+
646
+ libcublas.so, libcublasLt.so, libcublas_static.a,
647
+ libcublasLt_static.a
648
+
649
+ Android
650
+
651
+ libcublas.so, libcublasLt.so, libcublas_static.a,
652
+ libcublasLt_static.a
653
+
654
+ Component
655
+
656
+ NVIDIA "Drop-in" BLAS Library
657
+
658
+ Windows
659
+
660
+ nvblas.dll
661
+
662
+ Mac OSX
663
+
664
+ libnvblas.dylib
665
+
666
+ Linux
667
+
668
+ libnvblas.so
669
+
670
+ Component
671
+
672
+ CUDA Sparse Matrix Library
673
+
674
+ Windows
675
+
676
+ cusparse.dll, cusparse.lib
677
+
678
+ Mac OSX
679
+
680
+ libcusparse.dylib, libcusparse_static.a
681
+
682
+ Linux
683
+
684
+ libcusparse.so, libcusparse_static.a
685
+
686
+ Android
687
+
688
+ libcusparse.so, libcusparse_static.a
689
+
690
+ Component
691
+
692
+ CUDA Linear Solver Library
693
+
694
+ Windows
695
+
696
+ cusolver.dll, cusolver.lib
697
+
698
+ Mac OSX
699
+
700
+ libcusolver.dylib, libcusolver_static.a
701
+
702
+ Linux
703
+
704
+ libcusolver.so, libcusolver_static.a
705
+
706
+ Android
707
+
708
+ libcusolver.so, libcusolver_static.a
709
+
710
+ Component
711
+
712
+ CUDA Random Number Generation Library
713
+
714
+ Windows
715
+
716
+ curand.dll, curand.lib
717
+
718
+ Mac OSX
719
+
720
+ libcurand.dylib, libcurand_static.a
721
+
722
+ Linux
723
+
724
+ libcurand.so, libcurand_static.a
725
+
726
+ Android
727
+
728
+ libcurand.so, libcurand_static.a
729
+
730
+ Component
731
+
732
+ CUDA Accelerated Graph Library
733
+
734
+ Component
735
+
736
+ NVIDIA Performance Primitives Library
737
+
738
+ Windows
739
+
740
+ nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
741
+ nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
742
+ nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
743
+ nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
744
+ nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
745
+
746
+ Mac OSX
747
+
748
+ libnppc.dylib, libnppc_static.a, libnppial.dylib,
749
+ libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
750
+ libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
751
+ libnppidei_static.a, libnppif.dylib, libnppif_static.a,
752
+ libnppig.dylib, libnppig_static.a, libnppim.dylib,
753
+ libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
754
+ libnpps.dylib, libnpps_static.a
755
+
756
+ Linux
757
+
758
+ libnppc.so, libnppc_static.a, libnppial.so,
759
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
760
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
761
+ libnppidei_static.a, libnppif.so, libnppif_static.a
762
+ libnppig.so, libnppig_static.a, libnppim.so,
763
+ libnppim_static.a, libnppist.so, libnppist_static.a,
764
+ libnppisu.so, libnppisu_static.a, libnppitc.so
765
+ libnppitc_static.a, libnpps.so, libnpps_static.a
766
+
767
+ Android
768
+
769
+ libnppc.so, libnppc_static.a, libnppial.so,
770
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
771
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
772
+ libnppidei_static.a, libnppif.so, libnppif_static.a
773
+ libnppig.so, libnppig_static.a, libnppim.so,
774
+ libnppim_static.a, libnppist.so, libnppist_static.a,
775
+ libnppisu.so, libnppisu_static.a, libnppitc.so
776
+ libnppitc_static.a, libnpps.so, libnpps_static.a
777
+
778
+ Component
779
+
780
+ NVIDIA JPEG Library
781
+
782
+ Linux
783
+
784
+ libnvjpeg.so, libnvjpeg_static.a
785
+
786
+ Component
787
+
788
+ Internal common library required for statically linking to
789
+ cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
790
+
791
+ Mac OSX
792
+
793
+ libculibos.a
794
+
795
+ Linux
796
+
797
+ libculibos.a
798
+
799
+ Component
800
+
801
+ NVIDIA Runtime Compilation Library and Header
802
+
803
+ All
804
+
805
+ nvrtc.h
806
+
807
+ Windows
808
+
809
+ nvrtc.dll, nvrtc-builtins.dll
810
+
811
+ Mac OSX
812
+
813
+ libnvrtc.dylib, libnvrtc-builtins.dylib
814
+
815
+ Linux
816
+
817
+ libnvrtc.so, libnvrtc-builtins.so
818
+
819
+ Component
820
+
821
+ NVIDIA Optimizing Compiler Library
822
+
823
+ Windows
824
+
825
+ nvvm.dll
826
+
827
+ Mac OSX
828
+
829
+ libnvvm.dylib
830
+
831
+ Linux
832
+
833
+ libnvvm.so
834
+
835
+ Component
836
+
837
+ NVIDIA Common Device Math Functions Library
838
+
839
+ Windows
840
+
841
+ libdevice.10.bc
842
+
843
+ Mac OSX
844
+
845
+ libdevice.10.bc
846
+
847
+ Linux
848
+
849
+ libdevice.10.bc
850
+
851
+ Component
852
+
853
+ CUDA Occupancy Calculation Header Library
854
+
855
+ All
856
+
857
+ cuda_occupancy.h
858
+
859
+ Component
860
+
861
+ CUDA Half Precision Headers
862
+
863
+ All
864
+
865
+ cuda_fp16.h, cuda_fp16.hpp
866
+
867
+ Component
868
+
869
+ CUDA Profiling Tools Interface (CUPTI) Library
870
+
871
+ Windows
872
+
873
+ cupti.dll
874
+
875
+ Mac OSX
876
+
877
+ libcupti.dylib
878
+
879
+ Linux
880
+
881
+ libcupti.so
882
+
883
+ Component
884
+
885
+ NVIDIA Tools Extension Library
886
+
887
+ Windows
888
+
889
+ nvToolsExt.dll, nvToolsExt.lib
890
+
891
+ Mac OSX
892
+
893
+ libnvToolsExt.dylib
894
+
895
+ Linux
896
+
897
+ libnvToolsExt.so
898
+
899
+ Component
900
+
901
+ NVIDIA CUDA Driver Libraries
902
+
903
+ Linux
904
+
905
+ libcuda.so, libnvidia-fatbinaryloader.so,
906
+ libnvidia-ptxjitcompiler.so
907
+
908
+ The NVIDIA CUDA Driver Libraries are only distributable in
909
+ applications that meet this criteria:
910
+
911
+ 1. The application was developed starting from a NVIDIA CUDA
912
+ container obtained from Docker Hub or the NVIDIA GPU
913
+ Cloud, and
914
+
915
+ 2. The resulting application is packaged as a Docker
916
+ container and distributed to users on Docker Hub or the
917
+ NVIDIA GPU Cloud only.
918
+
919
+
920
+ 2.7. Attachment B
921
+
922
+
923
+ Additional Licensing Obligations
924
+
925
+ The following third party components included in the SOFTWARE
926
+ are licensed to Licensee pursuant to the following terms and
927
+ conditions:
928
+
929
+ 1. Licensee's use of the GDB third party component is
930
+ subject to the terms and conditions of GNU GPL v3:
931
+
932
+ This product includes copyrighted third-party software licensed
933
+ under the terms of the GNU General Public License v3 ("GPL v3").
934
+ All third-party software packages are copyright by their respective
935
+ authors. GPL v3 terms and conditions are hereby incorporated into
936
+ the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt
937
+
938
+ Consistent with these licensing requirements, the software
939
+ listed below is provided under the terms of the specified
940
+ open source software licenses. To obtain source code for
941
+ software provided under licenses that require
942
+ redistribution of source code, including the GNU General
943
+ Public License (GPL) and GNU Lesser General Public License
944
+ (LGPL), contact oss-requests@nvidia.com. This offer is
945
+ valid for a period of three (3) years from the date of the
946
+ distribution of this product by NVIDIA CORPORATION.
947
+
948
+ Component License
949
+ CUDA-GDB GPL v3
950
+
951
+ 2. Licensee represents and warrants that any and all third
952
+ party licensing and/or royalty payment obligations in
953
+ connection with Licensee's use of the H.264 video codecs
954
+ are solely the responsibility of Licensee.
955
+
956
+ 3. Licensee's use of the Thrust library is subject to the
957
+ terms and conditions of the Apache License Version 2.0.
958
+ All third-party software packages are copyright by their
959
+ respective authors. Apache License Version 2.0 terms and
960
+ conditions are hereby incorporated into the Agreement by
961
+ this reference.
962
+ http://www.apache.org/licenses/LICENSE-2.0.html
963
+
964
+ In addition, Licensee acknowledges the following notice:
965
+ Thrust includes source code from the Boost Iterator,
966
+ Tuple, System, and Random Number libraries.
967
+
968
+ Boost Software License - Version 1.0 - August 17th, 2003
969
+ . . . .
970
+
971
+ Permission is hereby granted, free of charge, to any person or
972
+ organization obtaining a copy of the software and accompanying
973
+ documentation covered by this license (the "Software") to use,
974
+ reproduce, display, distribute, execute, and transmit the Software,
975
+ and to prepare derivative works of the Software, and to permit
976
+ third-parties to whom the Software is furnished to do so, all
977
+ subject to the following:
978
+
979
+ The copyright notices in the Software and this entire statement,
980
+ including the above license grant, this restriction and the following
981
+ disclaimer, must be included in all copies of the Software, in whole
982
+ or in part, and all derivative works of the Software, unless such
983
+ copies or derivative works are solely in the form of machine-executable
984
+ object code generated by a source language processor.
985
+
986
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
987
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
988
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
989
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
990
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
991
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
992
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
993
+ OTHER DEALINGS IN THE SOFTWARE.
994
+
995
+ 4. Licensee's use of the LLVM third party component is
996
+ subject to the following terms and conditions:
997
+
998
+ ======================================================
999
+ LLVM Release License
1000
+ ======================================================
1001
+ University of Illinois/NCSA
1002
+ Open Source License
1003
+
1004
+ Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
1005
+ All rights reserved.
1006
+
1007
+ Developed by:
1008
+
1009
+ LLVM Team
1010
+
1011
+ University of Illinois at Urbana-Champaign
1012
+
1013
+ http://llvm.org
1014
+
1015
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1016
+ of this software and associated documentation files (the "Software"), to
1017
+ deal with the Software without restriction, including without limitation the
1018
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
1019
+ sell copies of the Software, and to permit persons to whom the Software is
1020
+ furnished to do so, subject to the following conditions:
1021
+
1022
+ * Redistributions of source code must retain the above copyright notice,
1023
+ this list of conditions and the following disclaimers.
1024
+
1025
+ * Redistributions in binary form must reproduce the above copyright
1026
+ notice, this list of conditions and the following disclaimers in the
1027
+ documentation and/or other materials provided with the distribution.
1028
+
1029
+ * Neither the names of the LLVM Team, University of Illinois at Urbana-
1030
+ Champaign, nor the names of its contributors may be used to endorse or
1031
+ promote products derived from this Software without specific prior
1032
+ written permission.
1033
+
1034
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1035
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1036
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1037
+ THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
1038
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1039
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1040
+ DEALINGS WITH THE SOFTWARE.
1041
+
1042
+ 5. Licensee's use (e.g. nvprof) of the PCRE third party
1043
+ component is subject to the following terms and
1044
+ conditions:
1045
+
1046
+ ------------
1047
+ PCRE LICENCE
1048
+ ------------
1049
+ PCRE is a library of functions to support regular expressions whose syntax
1050
+ and semantics are as close as possible to those of the Perl 5 language.
1051
+ Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
1052
+ specified below. The documentation for PCRE, supplied in the "doc"
1053
+ directory, is distributed under the same terms as the software itself. The
1054
+ basic library functions are written in C and are freestanding. Also
1055
+ included in the distribution is a set of C++ wrapper functions, and a just-
1056
+ in-time compiler that can be used to optimize pattern matching. These are
1057
+ both optional features that can be omitted when the library is built.
1058
+
1059
+ THE BASIC LIBRARY FUNCTIONS
1060
+ ---------------------------
1061
+ Written by: Philip Hazel
1062
+ Email local part: ph10
1063
+ Email domain: cam.ac.uk
1064
+ University of Cambridge Computing Service,
1065
+ Cambridge, England.
1066
+ Copyright (c) 1997-2012 University of Cambridge
1067
+ All rights reserved.
1068
+
1069
+ PCRE JUST-IN-TIME COMPILATION SUPPORT
1070
+ -------------------------------------
1071
+ Written by: Zoltan Herczeg
1072
+ Email local part: hzmester
1073
+ Emain domain: freemail.hu
1074
+ Copyright(c) 2010-2012 Zoltan Herczeg
1075
+ All rights reserved.
1076
+
1077
+ STACK-LESS JUST-IN-TIME COMPILER
1078
+ --------------------------------
1079
+ Written by: Zoltan Herczeg
1080
+ Email local part: hzmester
1081
+ Emain domain: freemail.hu
1082
+ Copyright(c) 2009-2012 Zoltan Herczeg
1083
+ All rights reserved.
1084
+
1085
+ THE C++ WRAPPER FUNCTIONS
1086
+ -------------------------
1087
+ Contributed by: Google Inc.
1088
+ Copyright (c) 2007-2012, Google Inc.
1089
+ All rights reserved.
1090
+
1091
+ THE "BSD" LICENCE
1092
+ -----------------
1093
+ Redistribution and use in source and binary forms, with or without
1094
+ modification, are permitted provided that the following conditions are met:
1095
+
1096
+ * Redistributions of source code must retain the above copyright notice,
1097
+ this list of conditions and the following disclaimer.
1098
+
1099
+ * Redistributions in binary form must reproduce the above copyright
1100
+ notice, this list of conditions and the following disclaimer in the
1101
+ documentation and/or other materials provided with the distribution.
1102
+
1103
+ * Neither the name of the University of Cambridge nor the name of Google
1104
+ Inc. nor the names of their contributors may be used to endorse or
1105
+ promote products derived from this software without specific prior
1106
+ written permission.
1107
+
1108
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1109
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1110
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1111
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1112
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1113
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1114
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1115
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1116
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1117
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1118
+ POSSIBILITY OF SUCH DAMAGE.
1119
+
1120
+ 6. Some of the cuBLAS library routines were written by or
1121
+ derived from code written by Vasily Volkov and are subject
1122
+ to the Modified Berkeley Software Distribution License as
1123
+ follows:
1124
+
1125
+ Copyright (c) 2007-2009, Regents of the University of California
1126
+
1127
+ All rights reserved.
1128
+
1129
+ Redistribution and use in source and binary forms, with or without
1130
+ modification, are permitted provided that the following conditions are
1131
+ met:
1132
+ * Redistributions of source code must retain the above copyright
1133
+ notice, this list of conditions and the following disclaimer.
1134
+ * Redistributions in binary form must reproduce the above
1135
+ copyright notice, this list of conditions and the following
1136
+ disclaimer in the documentation and/or other materials provided
1137
+ with the distribution.
1138
+ * Neither the name of the University of California, Berkeley nor
1139
+ the names of its contributors may be used to endorse or promote
1140
+ products derived from this software without specific prior
1141
+ written permission.
1142
+
1143
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1144
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1145
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1146
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1147
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1148
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1149
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1150
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1151
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1152
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1153
+ POSSIBILITY OF SUCH DAMAGE.
1154
+
1155
+ 7. Some of the cuBLAS library routines were written by or
1156
+ derived from code written by Davide Barbieri and are
1157
+ subject to the Modified Berkeley Software Distribution
1158
+ License as follows:
1159
+
1160
+ Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
1161
+
1162
+ All rights reserved.
1163
+
1164
+ Redistribution and use in source and binary forms, with or without
1165
+ modification, are permitted provided that the following conditions are
1166
+ met:
1167
+ * Redistributions of source code must retain the above copyright
1168
+ notice, this list of conditions and the following disclaimer.
1169
+ * Redistributions in binary form must reproduce the above
1170
+ copyright notice, this list of conditions and the following
1171
+ disclaimer in the documentation and/or other materials provided
1172
+ with the distribution.
1173
+ * The name of the author may not be used to endorse or promote
1174
+ products derived from this software without specific prior
1175
+ written permission.
1176
+
1177
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1178
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1179
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1180
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1181
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1182
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1183
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1184
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1185
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1186
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1187
+ POSSIBILITY OF SUCH DAMAGE.
1188
+
1189
+ 8. Some of the cuBLAS library routines were derived from
1190
+ code developed by the University of Tennessee and are
1191
+ subject to the Modified Berkeley Software Distribution
1192
+ License as follows:
1193
+
1194
+ Copyright (c) 2010 The University of Tennessee.
1195
+
1196
+ All rights reserved.
1197
+
1198
+ Redistribution and use in source and binary forms, with or without
1199
+ modification, are permitted provided that the following conditions are
1200
+ met:
1201
+ * Redistributions of source code must retain the above copyright
1202
+ notice, this list of conditions and the following disclaimer.
1203
+ * Redistributions in binary form must reproduce the above
1204
+ copyright notice, this list of conditions and the following
1205
+ disclaimer listed in this license in the documentation and/or
1206
+ other materials provided with the distribution.
1207
+ * Neither the name of the copyright holders nor the names of its
1208
+ contributors may be used to endorse or promote products derived
1209
+ from this software without specific prior written permission.
1210
+
1211
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1212
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1213
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1214
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1215
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1216
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1217
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1218
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1219
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1220
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1221
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1222
+
1223
+ 9. Some of the cuBLAS library routines were written by or
1224
+ derived from code written by Jonathan Hogg and are subject
1225
+ to the Modified Berkeley Software Distribution License as
1226
+ follows:
1227
+
1228
+ Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
1229
+
1230
+ All rights reserved.
1231
+
1232
+ Redistribution and use in source and binary forms, with or without
1233
+ modification, are permitted provided that the following conditions are
1234
+ met:
1235
+ * Redistributions of source code must retain the above copyright
1236
+ notice, this list of conditions and the following disclaimer.
1237
+ * Redistributions in binary form must reproduce the above
1238
+ copyright notice, this list of conditions and the following
1239
+ disclaimer in the documentation and/or other materials provided
1240
+ with the distribution.
1241
+ * Neither the name of the STFC nor the names of its contributors
1242
+ may be used to endorse or promote products derived from this
1243
+ software without specific prior written permission.
1244
+
1245
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1246
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1247
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1248
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
1249
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1250
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1251
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
1252
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1253
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
1254
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
1255
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1256
+
1257
+ 10. Some of the cuBLAS library routines were written by or
1258
+ derived from code written by Ahmad M. Abdelfattah, David
1259
+ Keyes, and Hatem Ltaief, and are subject to the Apache
1260
+ License, Version 2.0, as follows:
1261
+
1262
+ -- (C) Copyright 2013 King Abdullah University of Science and Technology
1263
+ Authors:
1264
+ Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
1265
+ David Keyes (david.keyes@kaust.edu.sa)
1266
+ Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
1267
+
1268
+ Redistribution and use in source and binary forms, with or without
1269
+ modification, are permitted provided that the following conditions
1270
+ are met:
1271
+
1272
+ * Redistributions of source code must retain the above copyright
1273
+ notice, this list of conditions and the following disclaimer.
1274
+ * Redistributions in binary form must reproduce the above copyright
1275
+ notice, this list of conditions and the following disclaimer in the
1276
+ documentation and/or other materials provided with the distribution.
1277
+ * Neither the name of the King Abdullah University of Science and
1278
+ Technology nor the names of its contributors may be used to endorse
1279
+ or promote products derived from this software without specific prior
1280
+ written permission.
1281
+
1282
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1283
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1284
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1285
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1286
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1287
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1288
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1289
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1290
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1291
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1292
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
1293
+
1294
+ 11. Some of the cuSPARSE library routines were written by or
1295
+ derived from code written by Li-Wen Chang and are subject
1296
+ to the NCSA Open Source License as follows:
1297
+
1298
+ Copyright (c) 2012, University of Illinois.
1299
+
1300
+ All rights reserved.
1301
+
1302
+ Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
1303
+
1304
+ Permission is hereby granted, free of charge, to any person obtaining
1305
+ a copy of this software and associated documentation files (the
1306
+ "Software"), to deal with the Software without restriction, including
1307
+ without limitation the rights to use, copy, modify, merge, publish,
1308
+ distribute, sublicense, and/or sell copies of the Software, and to
1309
+ permit persons to whom the Software is furnished to do so, subject to
1310
+ the following conditions:
1311
+ * Redistributions of source code must retain the above copyright
1312
+ notice, this list of conditions and the following disclaimer.
1313
+ * Redistributions in binary form must reproduce the above
1314
+ copyright notice, this list of conditions and the following
1315
+ disclaimers in the documentation and/or other materials provided
1316
+ with the distribution.
1317
+ * Neither the names of IMPACT Group, University of Illinois, nor
1318
+ the names of its contributors may be used to endorse or promote
1319
+ products derived from this Software without specific prior
1320
+ written permission.
1321
+
1322
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1323
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1324
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1325
+ NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
1326
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1327
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1328
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
1329
+ SOFTWARE.
1330
+
1331
+ 12. Some of the cuRAND library routines were written by or
1332
+ derived from code written by Mutsuo Saito and Makoto
1333
+ Matsumoto and are subject to the following license:
1334
+
1335
+ Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
1336
+ University. All rights reserved.
1337
+
1338
+ Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
1339
+ University and University of Tokyo. All rights reserved.
1340
+
1341
+ Redistribution and use in source and binary forms, with or without
1342
+ modification, are permitted provided that the following conditions are
1343
+ met:
1344
+ * Redistributions of source code must retain the above copyright
1345
+ notice, this list of conditions and the following disclaimer.
1346
+ * Redistributions in binary form must reproduce the above
1347
+ copyright notice, this list of conditions and the following
1348
+ disclaimer in the documentation and/or other materials provided
1349
+ with the distribution.
1350
+ * Neither the name of the Hiroshima University nor the names of
1351
+ its contributors may be used to endorse or promote products
1352
+ derived from this software without specific prior written
1353
+ permission.
1354
+
1355
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1356
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1357
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1358
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1359
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1360
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1361
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1362
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1363
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1364
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1365
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1366
+
1367
+ 13. Some of the cuRAND library routines were derived from
1368
+ code developed by D. E. Shaw Research and are subject to
1369
+ the following license:
1370
+
1371
+ Copyright 2010-2011, D. E. Shaw Research.
1372
+
1373
+ All rights reserved.
1374
+
1375
+ Redistribution and use in source and binary forms, with or without
1376
+ modification, are permitted provided that the following conditions are
1377
+ met:
1378
+ * Redistributions of source code must retain the above copyright
1379
+ notice, this list of conditions, and the following disclaimer.
1380
+ * Redistributions in binary form must reproduce the above
1381
+ copyright notice, this list of conditions, and the following
1382
+ disclaimer in the documentation and/or other materials provided
1383
+ with the distribution.
1384
+ * Neither the name of D. E. Shaw Research nor the names of its
1385
+ contributors may be used to endorse or promote products derived
1386
+ from this software without specific prior written permission.
1387
+
1388
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1389
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1390
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1391
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1392
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1393
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1394
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1395
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1396
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1397
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1398
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1399
+
1400
+ 14. Some of the Math library routines were written by or
1401
+ derived from code developed by Norbert Juffa and are
1402
+ subject to the following license:
1403
+
1404
+ Copyright (c) 2015-2017, Norbert Juffa
1405
+ All rights reserved.
1406
+
1407
+ Redistribution and use in source and binary forms, with or without
1408
+ modification, are permitted provided that the following conditions
1409
+ are met:
1410
+
1411
+ 1. Redistributions of source code must retain the above copyright
1412
+ notice, this list of conditions and the following disclaimer.
1413
+
1414
+ 2. Redistributions in binary form must reproduce the above copyright
1415
+ notice, this list of conditions and the following disclaimer in the
1416
+ documentation and/or other materials provided with the distribution.
1417
+
1418
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1419
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1420
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1421
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1422
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1423
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1424
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1425
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1426
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1427
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1428
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1429
+
1430
+ 15. Licensee's use of the lz4 third party component is
1431
+ subject to the following terms and conditions:
1432
+
1433
+ Copyright (C) 2011-2013, Yann Collet.
1434
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1435
+
1436
+ Redistribution and use in source and binary forms, with or without
1437
+ modification, are permitted provided that the following conditions are
1438
+ met:
1439
+
1440
+ * Redistributions of source code must retain the above copyright
1441
+ notice, this list of conditions and the following disclaimer.
1442
+ * Redistributions in binary form must reproduce the above
1443
+ copyright notice, this list of conditions and the following disclaimer
1444
+ in the documentation and/or other materials provided with the
1445
+ distribution.
1446
+
1447
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1448
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1449
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1450
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1451
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1452
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1453
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1454
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1455
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1456
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1457
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1458
+
1459
+ 16. The NPP library uses code from the Boost Math Toolkit,
1460
+ and is subject to the following license:
1461
+
1462
+ Boost Software License - Version 1.0 - August 17th, 2003
1463
+ . . . .
1464
+
1465
+ Permission is hereby granted, free of charge, to any person or
1466
+ organization obtaining a copy of the software and accompanying
1467
+ documentation covered by this license (the "Software") to use,
1468
+ reproduce, display, distribute, execute, and transmit the Software,
1469
+ and to prepare derivative works of the Software, and to permit
1470
+ third-parties to whom the Software is furnished to do so, all
1471
+ subject to the following:
1472
+
1473
+ The copyright notices in the Software and this entire statement,
1474
+ including the above license grant, this restriction and the following
1475
+ disclaimer, must be included in all copies of the Software, in whole
1476
+ or in part, and all derivative works of the Software, unless such
1477
+ copies or derivative works are solely in the form of machine-executable
1478
+ object code generated by a source language processor.
1479
+
1480
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1481
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1482
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
1483
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
1484
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
1485
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
1486
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
1487
+ OTHER DEALINGS IN THE SOFTWARE.
1488
+
1489
+ 17. Portions of the Nsight Eclipse Edition is subject to the
1490
+ following license:
1491
+
1492
+ The Eclipse Foundation makes available all content in this plug-in
1493
+ ("Content"). Unless otherwise indicated below, the Content is provided
1494
+ to you under the terms and conditions of the Eclipse Public License
1495
+ Version 1.0 ("EPL"). A copy of the EPL is available at http://
1496
+ www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
1497
+ will mean the Content.
1498
+
1499
+ If you did not receive this Content directly from the Eclipse
1500
+ Foundation, the Content is being redistributed by another party
1501
+ ("Redistributor") and different terms and conditions may apply to your
1502
+ use of any object code in the Content. Check the Redistributor's
1503
+ license that was provided with the Content. If no such license exists,
1504
+ contact the Redistributor. Unless otherwise indicated below, the terms
1505
+ and conditions of the EPL still apply to any source code in the
1506
+ Content and such source code may be obtained at http://www.eclipse.org.
1507
+
1508
+ 18. Some of the cuBLAS library routines uses code from
1509
+ OpenAI, which is subject to the following license:
1510
+
1511
+ License URL
1512
+ https://github.com/openai/openai-gemm/blob/master/LICENSE
1513
+
1514
+ License Text
1515
+ The MIT License
1516
+
1517
+ Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
1518
+
1519
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1520
+ of this software and associated documentation files (the "Software"), to deal
1521
+ in the Software without restriction, including without limitation the rights
1522
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1523
+ copies of the Software, and to permit persons to whom the Software is
1524
+ furnished to do so, subject to the following conditions:
1525
+
1526
+ The above copyright notice and this permission notice shall be included in
1527
+ all copies or substantial portions of the Software.
1528
+
1529
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1530
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1531
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1532
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1533
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1534
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1535
+ THE SOFTWARE.
1536
+
1537
+ 19. Licensee's use of the Visual Studio Setup Configuration
1538
+ Samples is subject to the following license:
1539
+
1540
+ The MIT License (MIT)
1541
+ Copyright (C) Microsoft Corporation. All rights reserved.
1542
+
1543
+ Permission is hereby granted, free of charge, to any person
1544
+ obtaining a copy of this software and associated documentation
1545
+ files (the "Software"), to deal in the Software without restriction,
1546
+ including without limitation the rights to use, copy, modify, merge,
1547
+ publish, distribute, sublicense, and/or sell copies of the Software,
1548
+ and to permit persons to whom the Software is furnished to do so,
1549
+ subject to the following conditions:
1550
+
1551
+ The above copyright notice and this permission notice shall be included
1552
+ in all copies or substantial portions of the Software.
1553
+
1554
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1555
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1556
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1557
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1558
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1559
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1560
+
1561
+ 20. Licensee's use of linmath.h header for CPU functions for
1562
+ GL vector/matrix operations from lunarG is subject to the
1563
+ Apache License Version 2.0.
1564
+
1565
+ 21. The DX12-CUDA sample uses the d3dx12.h header, which is
1566
+ subject to the MIT license .
1567
+
1568
+ -----------------
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5dc9884a8f458371550e09bd396e5418bf375820a31b9899f6499bf391c7b2e
3
+ size 168448
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distro/__main__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .distro import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (228 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-311.pyc ADDED
Binary file (12.2 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (245 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-311.pyc ADDED
Binary file (17 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-311.pyc ADDED
Binary file (15.6 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module uses ctypes to bind a whole bunch of functions and constants from
3
+ SecureTransport. The goal here is to provide the low-level API to
4
+ SecureTransport. These are essentially the C-level functions and constants, and
5
+ they're pretty gross to work with.
6
+
7
+ This code is a bastardised version of the code found in Will Bond's oscrypto
8
+ library. An enormous debt is owed to him for blazing this trail for us. For
9
+ that reason, this code should be considered to be covered both by urllib3's
10
+ license and by oscrypto's:
11
+
12
+ Copyright (c) 2015-2016 Will Bond <will@wbond.net>
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a
15
+ copy of this software and associated documentation files (the "Software"),
16
+ to deal in the Software without restriction, including without limitation
17
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
18
+ and/or sell copies of the Software, and to permit persons to whom the
19
+ Software is furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in
22
+ all copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30
+ DEALINGS IN THE SOFTWARE.
31
+ """
32
+ from __future__ import absolute_import
33
+
34
+ import platform
35
+ from ctypes import (
36
+ CDLL,
37
+ CFUNCTYPE,
38
+ POINTER,
39
+ c_bool,
40
+ c_byte,
41
+ c_char_p,
42
+ c_int32,
43
+ c_long,
44
+ c_size_t,
45
+ c_uint32,
46
+ c_ulong,
47
+ c_void_p,
48
+ )
49
+ from ctypes.util import find_library
50
+
51
+ from ...packages.six import raise_from
52
+
53
+ if platform.system() != "Darwin":
54
+ raise ImportError("Only macOS is supported")
55
+
56
+ version = platform.mac_ver()[0]
57
+ version_info = tuple(map(int, version.split(".")))
58
+ if version_info < (10, 8):
59
+ raise OSError(
60
+ "Only OS X 10.8 and newer are supported, not %s.%s"
61
+ % (version_info[0], version_info[1])
62
+ )
63
+
64
+
65
+ def load_cdll(name, macos10_16_path):
66
+ """Loads a CDLL by name, falling back to known path on 10.16+"""
67
+ try:
68
+ # Big Sur is technically 11 but we use 10.16 due to the Big Sur
69
+ # beta being labeled as 10.16.
70
+ if version_info >= (10, 16):
71
+ path = macos10_16_path
72
+ else:
73
+ path = find_library(name)
74
+ if not path:
75
+ raise OSError # Caught and reraised as 'ImportError'
76
+ return CDLL(path, use_errno=True)
77
+ except OSError:
78
+ raise_from(ImportError("The library %s failed to load" % name), None)
79
+
80
+
81
+ Security = load_cdll(
82
+ "Security", "/System/Library/Frameworks/Security.framework/Security"
83
+ )
84
+ CoreFoundation = load_cdll(
85
+ "CoreFoundation",
86
+ "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation",
87
+ )
88
+
89
+
90
+ Boolean = c_bool
91
+ CFIndex = c_long
92
+ CFStringEncoding = c_uint32
93
+ CFData = c_void_p
94
+ CFString = c_void_p
95
+ CFArray = c_void_p
96
+ CFMutableArray = c_void_p
97
+ CFDictionary = c_void_p
98
+ CFError = c_void_p
99
+ CFType = c_void_p
100
+ CFTypeID = c_ulong
101
+
102
+ CFTypeRef = POINTER(CFType)
103
+ CFAllocatorRef = c_void_p
104
+
105
+ OSStatus = c_int32
106
+
107
+ CFDataRef = POINTER(CFData)
108
+ CFStringRef = POINTER(CFString)
109
+ CFArrayRef = POINTER(CFArray)
110
+ CFMutableArrayRef = POINTER(CFMutableArray)
111
+ CFDictionaryRef = POINTER(CFDictionary)
112
+ CFArrayCallBacks = c_void_p
113
+ CFDictionaryKeyCallBacks = c_void_p
114
+ CFDictionaryValueCallBacks = c_void_p
115
+
116
+ SecCertificateRef = POINTER(c_void_p)
117
+ SecExternalFormat = c_uint32
118
+ SecExternalItemType = c_uint32
119
+ SecIdentityRef = POINTER(c_void_p)
120
+ SecItemImportExportFlags = c_uint32
121
+ SecItemImportExportKeyParameters = c_void_p
122
+ SecKeychainRef = POINTER(c_void_p)
123
+ SSLProtocol = c_uint32
124
+ SSLCipherSuite = c_uint32
125
+ SSLContextRef = POINTER(c_void_p)
126
+ SecTrustRef = POINTER(c_void_p)
127
+ SSLConnectionRef = c_uint32
128
+ SecTrustResultType = c_uint32
129
+ SecTrustOptionFlags = c_uint32
130
+ SSLProtocolSide = c_uint32
131
+ SSLConnectionType = c_uint32
132
+ SSLSessionOption = c_uint32
133
+
134
+
135
+ try:
136
+ Security.SecItemImport.argtypes = [
137
+ CFDataRef,
138
+ CFStringRef,
139
+ POINTER(SecExternalFormat),
140
+ POINTER(SecExternalItemType),
141
+ SecItemImportExportFlags,
142
+ POINTER(SecItemImportExportKeyParameters),
143
+ SecKeychainRef,
144
+ POINTER(CFArrayRef),
145
+ ]
146
+ Security.SecItemImport.restype = OSStatus
147
+
148
+ Security.SecCertificateGetTypeID.argtypes = []
149
+ Security.SecCertificateGetTypeID.restype = CFTypeID
150
+
151
+ Security.SecIdentityGetTypeID.argtypes = []
152
+ Security.SecIdentityGetTypeID.restype = CFTypeID
153
+
154
+ Security.SecKeyGetTypeID.argtypes = []
155
+ Security.SecKeyGetTypeID.restype = CFTypeID
156
+
157
+ Security.SecCertificateCreateWithData.argtypes = [CFAllocatorRef, CFDataRef]
158
+ Security.SecCertificateCreateWithData.restype = SecCertificateRef
159
+
160
+ Security.SecCertificateCopyData.argtypes = [SecCertificateRef]
161
+ Security.SecCertificateCopyData.restype = CFDataRef
162
+
163
+ Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
164
+ Security.SecCopyErrorMessageString.restype = CFStringRef
165
+
166
+ Security.SecIdentityCreateWithCertificate.argtypes = [
167
+ CFTypeRef,
168
+ SecCertificateRef,
169
+ POINTER(SecIdentityRef),
170
+ ]
171
+ Security.SecIdentityCreateWithCertificate.restype = OSStatus
172
+
173
+ Security.SecKeychainCreate.argtypes = [
174
+ c_char_p,
175
+ c_uint32,
176
+ c_void_p,
177
+ Boolean,
178
+ c_void_p,
179
+ POINTER(SecKeychainRef),
180
+ ]
181
+ Security.SecKeychainCreate.restype = OSStatus
182
+
183
+ Security.SecKeychainDelete.argtypes = [SecKeychainRef]
184
+ Security.SecKeychainDelete.restype = OSStatus
185
+
186
+ Security.SecPKCS12Import.argtypes = [
187
+ CFDataRef,
188
+ CFDictionaryRef,
189
+ POINTER(CFArrayRef),
190
+ ]
191
+ Security.SecPKCS12Import.restype = OSStatus
192
+
193
+ SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t))
194
+ SSLWriteFunc = CFUNCTYPE(
195
+ OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t)
196
+ )
197
+
198
+ Security.SSLSetIOFuncs.argtypes = [SSLContextRef, SSLReadFunc, SSLWriteFunc]
199
+ Security.SSLSetIOFuncs.restype = OSStatus
200
+
201
+ Security.SSLSetPeerID.argtypes = [SSLContextRef, c_char_p, c_size_t]
202
+ Security.SSLSetPeerID.restype = OSStatus
203
+
204
+ Security.SSLSetCertificate.argtypes = [SSLContextRef, CFArrayRef]
205
+ Security.SSLSetCertificate.restype = OSStatus
206
+
207
+ Security.SSLSetCertificateAuthorities.argtypes = [SSLContextRef, CFTypeRef, Boolean]
208
+ Security.SSLSetCertificateAuthorities.restype = OSStatus
209
+
210
+ Security.SSLSetConnection.argtypes = [SSLContextRef, SSLConnectionRef]
211
+ Security.SSLSetConnection.restype = OSStatus
212
+
213
+ Security.SSLSetPeerDomainName.argtypes = [SSLContextRef, c_char_p, c_size_t]
214
+ Security.SSLSetPeerDomainName.restype = OSStatus
215
+
216
+ Security.SSLHandshake.argtypes = [SSLContextRef]
217
+ Security.SSLHandshake.restype = OSStatus
218
+
219
+ Security.SSLRead.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
220
+ Security.SSLRead.restype = OSStatus
221
+
222
+ Security.SSLWrite.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
223
+ Security.SSLWrite.restype = OSStatus
224
+
225
+ Security.SSLClose.argtypes = [SSLContextRef]
226
+ Security.SSLClose.restype = OSStatus
227
+
228
+ Security.SSLGetNumberSupportedCiphers.argtypes = [SSLContextRef, POINTER(c_size_t)]
229
+ Security.SSLGetNumberSupportedCiphers.restype = OSStatus
230
+
231
+ Security.SSLGetSupportedCiphers.argtypes = [
232
+ SSLContextRef,
233
+ POINTER(SSLCipherSuite),
234
+ POINTER(c_size_t),
235
+ ]
236
+ Security.SSLGetSupportedCiphers.restype = OSStatus
237
+
238
+ Security.SSLSetEnabledCiphers.argtypes = [
239
+ SSLContextRef,
240
+ POINTER(SSLCipherSuite),
241
+ c_size_t,
242
+ ]
243
+ Security.SSLSetEnabledCiphers.restype = OSStatus
244
+
245
+ Security.SSLGetNumberEnabledCiphers.argtype = [SSLContextRef, POINTER(c_size_t)]
246
+ Security.SSLGetNumberEnabledCiphers.restype = OSStatus
247
+
248
+ Security.SSLGetEnabledCiphers.argtypes = [
249
+ SSLContextRef,
250
+ POINTER(SSLCipherSuite),
251
+ POINTER(c_size_t),
252
+ ]
253
+ Security.SSLGetEnabledCiphers.restype = OSStatus
254
+
255
+ Security.SSLGetNegotiatedCipher.argtypes = [SSLContextRef, POINTER(SSLCipherSuite)]
256
+ Security.SSLGetNegotiatedCipher.restype = OSStatus
257
+
258
+ Security.SSLGetNegotiatedProtocolVersion.argtypes = [
259
+ SSLContextRef,
260
+ POINTER(SSLProtocol),
261
+ ]
262
+ Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus
263
+
264
+ Security.SSLCopyPeerTrust.argtypes = [SSLContextRef, POINTER(SecTrustRef)]
265
+ Security.SSLCopyPeerTrust.restype = OSStatus
266
+
267
+ Security.SecTrustSetAnchorCertificates.argtypes = [SecTrustRef, CFArrayRef]
268
+ Security.SecTrustSetAnchorCertificates.restype = OSStatus
269
+
270
+ Security.SecTrustSetAnchorCertificatesOnly.argstypes = [SecTrustRef, Boolean]
271
+ Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus
272
+
273
+ Security.SecTrustEvaluate.argtypes = [SecTrustRef, POINTER(SecTrustResultType)]
274
+ Security.SecTrustEvaluate.restype = OSStatus
275
+
276
+ Security.SecTrustGetCertificateCount.argtypes = [SecTrustRef]
277
+ Security.SecTrustGetCertificateCount.restype = CFIndex
278
+
279
+ Security.SecTrustGetCertificateAtIndex.argtypes = [SecTrustRef, CFIndex]
280
+ Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef
281
+
282
+ Security.SSLCreateContext.argtypes = [
283
+ CFAllocatorRef,
284
+ SSLProtocolSide,
285
+ SSLConnectionType,
286
+ ]
287
+ Security.SSLCreateContext.restype = SSLContextRef
288
+
289
+ Security.SSLSetSessionOption.argtypes = [SSLContextRef, SSLSessionOption, Boolean]
290
+ Security.SSLSetSessionOption.restype = OSStatus
291
+
292
+ Security.SSLSetProtocolVersionMin.argtypes = [SSLContextRef, SSLProtocol]
293
+ Security.SSLSetProtocolVersionMin.restype = OSStatus
294
+
295
+ Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol]
296
+ Security.SSLSetProtocolVersionMax.restype = OSStatus
297
+
298
+ try:
299
+ Security.SSLSetALPNProtocols.argtypes = [SSLContextRef, CFArrayRef]
300
+ Security.SSLSetALPNProtocols.restype = OSStatus
301
+ except AttributeError:
302
+ # Supported only in 10.12+
303
+ pass
304
+
305
+ Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
306
+ Security.SecCopyErrorMessageString.restype = CFStringRef
307
+
308
+ Security.SSLReadFunc = SSLReadFunc
309
+ Security.SSLWriteFunc = SSLWriteFunc
310
+ Security.SSLContextRef = SSLContextRef
311
+ Security.SSLProtocol = SSLProtocol
312
+ Security.SSLCipherSuite = SSLCipherSuite
313
+ Security.SecIdentityRef = SecIdentityRef
314
+ Security.SecKeychainRef = SecKeychainRef
315
+ Security.SecTrustRef = SecTrustRef
316
+ Security.SecTrustResultType = SecTrustResultType
317
+ Security.SecExternalFormat = SecExternalFormat
318
+ Security.OSStatus = OSStatus
319
+
320
+ Security.kSecImportExportPassphrase = CFStringRef.in_dll(
321
+ Security, "kSecImportExportPassphrase"
322
+ )
323
+ Security.kSecImportItemIdentity = CFStringRef.in_dll(
324
+ Security, "kSecImportItemIdentity"
325
+ )
326
+
327
+ # CoreFoundation time!
328
+ CoreFoundation.CFRetain.argtypes = [CFTypeRef]
329
+ CoreFoundation.CFRetain.restype = CFTypeRef
330
+
331
+ CoreFoundation.CFRelease.argtypes = [CFTypeRef]
332
+ CoreFoundation.CFRelease.restype = None
333
+
334
+ CoreFoundation.CFGetTypeID.argtypes = [CFTypeRef]
335
+ CoreFoundation.CFGetTypeID.restype = CFTypeID
336
+
337
+ CoreFoundation.CFStringCreateWithCString.argtypes = [
338
+ CFAllocatorRef,
339
+ c_char_p,
340
+ CFStringEncoding,
341
+ ]
342
+ CoreFoundation.CFStringCreateWithCString.restype = CFStringRef
343
+
344
+ CoreFoundation.CFStringGetCStringPtr.argtypes = [CFStringRef, CFStringEncoding]
345
+ CoreFoundation.CFStringGetCStringPtr.restype = c_char_p
346
+
347
+ CoreFoundation.CFStringGetCString.argtypes = [
348
+ CFStringRef,
349
+ c_char_p,
350
+ CFIndex,
351
+ CFStringEncoding,
352
+ ]
353
+ CoreFoundation.CFStringGetCString.restype = c_bool
354
+
355
+ CoreFoundation.CFDataCreate.argtypes = [CFAllocatorRef, c_char_p, CFIndex]
356
+ CoreFoundation.CFDataCreate.restype = CFDataRef
357
+
358
+ CoreFoundation.CFDataGetLength.argtypes = [CFDataRef]
359
+ CoreFoundation.CFDataGetLength.restype = CFIndex
360
+
361
+ CoreFoundation.CFDataGetBytePtr.argtypes = [CFDataRef]
362
+ CoreFoundation.CFDataGetBytePtr.restype = c_void_p
363
+
364
+ CoreFoundation.CFDictionaryCreate.argtypes = [
365
+ CFAllocatorRef,
366
+ POINTER(CFTypeRef),
367
+ POINTER(CFTypeRef),
368
+ CFIndex,
369
+ CFDictionaryKeyCallBacks,
370
+ CFDictionaryValueCallBacks,
371
+ ]
372
+ CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef
373
+
374
+ CoreFoundation.CFDictionaryGetValue.argtypes = [CFDictionaryRef, CFTypeRef]
375
+ CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef
376
+
377
+ CoreFoundation.CFArrayCreate.argtypes = [
378
+ CFAllocatorRef,
379
+ POINTER(CFTypeRef),
380
+ CFIndex,
381
+ CFArrayCallBacks,
382
+ ]
383
+ CoreFoundation.CFArrayCreate.restype = CFArrayRef
384
+
385
+ CoreFoundation.CFArrayCreateMutable.argtypes = [
386
+ CFAllocatorRef,
387
+ CFIndex,
388
+ CFArrayCallBacks,
389
+ ]
390
+ CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef
391
+
392
+ CoreFoundation.CFArrayAppendValue.argtypes = [CFMutableArrayRef, c_void_p]
393
+ CoreFoundation.CFArrayAppendValue.restype = None
394
+
395
+ CoreFoundation.CFArrayGetCount.argtypes = [CFArrayRef]
396
+ CoreFoundation.CFArrayGetCount.restype = CFIndex
397
+
398
+ CoreFoundation.CFArrayGetValueAtIndex.argtypes = [CFArrayRef, CFIndex]
399
+ CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p
400
+
401
+ CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll(
402
+ CoreFoundation, "kCFAllocatorDefault"
403
+ )
404
+ CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(
405
+ CoreFoundation, "kCFTypeArrayCallBacks"
406
+ )
407
+ CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll(
408
+ CoreFoundation, "kCFTypeDictionaryKeyCallBacks"
409
+ )
410
+ CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll(
411
+ CoreFoundation, "kCFTypeDictionaryValueCallBacks"
412
+ )
413
+
414
+ CoreFoundation.CFTypeRef = CFTypeRef
415
+ CoreFoundation.CFArrayRef = CFArrayRef
416
+ CoreFoundation.CFStringRef = CFStringRef
417
+ CoreFoundation.CFDictionaryRef = CFDictionaryRef
418
+
419
+ except (AttributeError):
420
+ raise ImportError("Error initializing ctypes")
421
+
422
+
423
+ class CFConst(object):
424
+ """
425
+ A class object that acts as essentially a namespace for CoreFoundation
426
+ constants.
427
+ """
428
+
429
+ kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
430
+
431
+
432
+ class SecurityConst(object):
433
+ """
434
+ A class object that acts as essentially a namespace for Security constants.
435
+ """
436
+
437
+ kSSLSessionOptionBreakOnServerAuth = 0
438
+
439
+ kSSLProtocol2 = 1
440
+ kSSLProtocol3 = 2
441
+ kTLSProtocol1 = 4
442
+ kTLSProtocol11 = 7
443
+ kTLSProtocol12 = 8
444
+ # SecureTransport does not support TLS 1.3 even if there's a constant for it
445
+ kTLSProtocol13 = 10
446
+ kTLSProtocolMaxSupported = 999
447
+
448
+ kSSLClientSide = 1
449
+ kSSLStreamType = 0
450
+
451
+ kSecFormatPEMSequence = 10
452
+
453
+ kSecTrustResultInvalid = 0
454
+ kSecTrustResultProceed = 1
455
+ # This gap is present on purpose: this was kSecTrustResultConfirm, which
456
+ # is deprecated.
457
+ kSecTrustResultDeny = 3
458
+ kSecTrustResultUnspecified = 4
459
+ kSecTrustResultRecoverableTrustFailure = 5
460
+ kSecTrustResultFatalTrustFailure = 6
461
+ kSecTrustResultOtherError = 7
462
+
463
+ errSSLProtocol = -9800
464
+ errSSLWouldBlock = -9803
465
+ errSSLClosedGraceful = -9805
466
+ errSSLClosedNoNotify = -9816
467
+ errSSLClosedAbort = -9806
468
+
469
+ errSSLXCertChainInvalid = -9807
470
+ errSSLCrypto = -9809
471
+ errSSLInternal = -9810
472
+ errSSLCertExpired = -9814
473
+ errSSLCertNotYetValid = -9815
474
+ errSSLUnknownRootCert = -9812
475
+ errSSLNoRootCert = -9813
476
+ errSSLHostNameMismatch = -9843
477
+ errSSLPeerHandshakeFail = -9824
478
+ errSSLPeerUserCancelled = -9839
479
+ errSSLWeakPeerEphemeralDHKey = -9850
480
+ errSSLServerAuthCompleted = -9841
481
+ errSSLRecordOverflow = -9847
482
+
483
+ errSecVerifyFailed = -67808
484
+ errSecNoTrustSettings = -25263
485
+ errSecItemNotFound = -25300
486
+ errSecInvalidTrustSettings = -25262
487
+
488
+ # Cipher suites. We only pick the ones our default cipher string allows.
489
+ # Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
490
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
491
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
492
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
493
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
494
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
495
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
496
+ TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
497
+ TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
498
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
499
+ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
500
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
501
+ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
502
+ TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
503
+ TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
504
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
505
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
506
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
507
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
508
+ TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
509
+ TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
510
+ TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
511
+ TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
512
+ TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
513
+ TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
514
+ TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
515
+ TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
516
+ TLS_AES_128_GCM_SHA256 = 0x1301
517
+ TLS_AES_256_GCM_SHA384 = 0x1302
518
+ TLS_AES_128_CCM_8_SHA256 = 0x1305
519
+ TLS_AES_128_CCM_SHA256 = 0x1304
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Low-level helpers for the SecureTransport bindings.
3
+
4
+ These are Python functions that are not directly related to the high-level APIs
5
+ but are necessary to get them to work. They include a whole bunch of low-level
6
+ CoreFoundation messing about and memory management. The concerns in this module
7
+ are almost entirely about trying to avoid memory leaks and providing
8
+ appropriate and useful assistance to the higher-level code.
9
+ """
10
+ import base64
11
+ import ctypes
12
+ import itertools
13
+ import os
14
+ import re
15
+ import ssl
16
+ import struct
17
+ import tempfile
18
+
19
+ from .bindings import CFConst, CoreFoundation, Security
20
+
21
+ # This regular expression is used to grab PEM data out of a PEM bundle.
22
+ _PEM_CERTS_RE = re.compile(
23
+ b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL
24
+ )
25
+
26
+
27
+ def _cf_data_from_bytes(bytestring):
28
+ """
29
+ Given a bytestring, create a CFData object from it. This CFData object must
30
+ be CFReleased by the caller.
31
+ """
32
+ return CoreFoundation.CFDataCreate(
33
+ CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring)
34
+ )
35
+
36
+
37
+ def _cf_dictionary_from_tuples(tuples):
38
+ """
39
+ Given a list of Python tuples, create an associated CFDictionary.
40
+ """
41
+ dictionary_size = len(tuples)
42
+
43
+ # We need to get the dictionary keys and values out in the same order.
44
+ keys = (t[0] for t in tuples)
45
+ values = (t[1] for t in tuples)
46
+ cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys)
47
+ cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values)
48
+
49
+ return CoreFoundation.CFDictionaryCreate(
50
+ CoreFoundation.kCFAllocatorDefault,
51
+ cf_keys,
52
+ cf_values,
53
+ dictionary_size,
54
+ CoreFoundation.kCFTypeDictionaryKeyCallBacks,
55
+ CoreFoundation.kCFTypeDictionaryValueCallBacks,
56
+ )
57
+
58
+
59
+ def _cfstr(py_bstr):
60
+ """
61
+ Given a Python binary data, create a CFString.
62
+ The string must be CFReleased by the caller.
63
+ """
64
+ c_str = ctypes.c_char_p(py_bstr)
65
+ cf_str = CoreFoundation.CFStringCreateWithCString(
66
+ CoreFoundation.kCFAllocatorDefault,
67
+ c_str,
68
+ CFConst.kCFStringEncodingUTF8,
69
+ )
70
+ return cf_str
71
+
72
+
73
+ def _create_cfstring_array(lst):
74
+ """
75
+ Given a list of Python binary data, create an associated CFMutableArray.
76
+ The array must be CFReleased by the caller.
77
+
78
+ Raises an ssl.SSLError on failure.
79
+ """
80
+ cf_arr = None
81
+ try:
82
+ cf_arr = CoreFoundation.CFArrayCreateMutable(
83
+ CoreFoundation.kCFAllocatorDefault,
84
+ 0,
85
+ ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
86
+ )
87
+ if not cf_arr:
88
+ raise MemoryError("Unable to allocate memory!")
89
+ for item in lst:
90
+ cf_str = _cfstr(item)
91
+ if not cf_str:
92
+ raise MemoryError("Unable to allocate memory!")
93
+ try:
94
+ CoreFoundation.CFArrayAppendValue(cf_arr, cf_str)
95
+ finally:
96
+ CoreFoundation.CFRelease(cf_str)
97
+ except BaseException as e:
98
+ if cf_arr:
99
+ CoreFoundation.CFRelease(cf_arr)
100
+ raise ssl.SSLError("Unable to allocate array: %s" % (e,))
101
+ return cf_arr
102
+
103
+
104
+ def _cf_string_to_unicode(value):
105
+ """
106
+ Creates a Unicode string from a CFString object. Used entirely for error
107
+ reporting.
108
+
109
+ Yes, it annoys me quite a lot that this function is this complex.
110
+ """
111
+ value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p))
112
+
113
+ string = CoreFoundation.CFStringGetCStringPtr(
114
+ value_as_void_p, CFConst.kCFStringEncodingUTF8
115
+ )
116
+ if string is None:
117
+ buffer = ctypes.create_string_buffer(1024)
118
+ result = CoreFoundation.CFStringGetCString(
119
+ value_as_void_p, buffer, 1024, CFConst.kCFStringEncodingUTF8
120
+ )
121
+ if not result:
122
+ raise OSError("Error copying C string from CFStringRef")
123
+ string = buffer.value
124
+ if string is not None:
125
+ string = string.decode("utf-8")
126
+ return string
127
+
128
+
129
+ def _assert_no_error(error, exception_class=None):
130
+ """
131
+ Checks the return code and throws an exception if there is an error to
132
+ report
133
+ """
134
+ if error == 0:
135
+ return
136
+
137
+ cf_error_string = Security.SecCopyErrorMessageString(error, None)
138
+ output = _cf_string_to_unicode(cf_error_string)
139
+ CoreFoundation.CFRelease(cf_error_string)
140
+
141
+ if output is None or output == u"":
142
+ output = u"OSStatus %s" % error
143
+
144
+ if exception_class is None:
145
+ exception_class = ssl.SSLError
146
+
147
+ raise exception_class(output)
148
+
149
+
150
+ def _cert_array_from_pem(pem_bundle):
151
+ """
152
+ Given a bundle of certs in PEM format, turns them into a CFArray of certs
153
+ that can be used to validate a cert chain.
154
+ """
155
+ # Normalize the PEM bundle's line endings.
156
+ pem_bundle = pem_bundle.replace(b"\r\n", b"\n")
157
+
158
+ der_certs = [
159
+ base64.b64decode(match.group(1)) for match in _PEM_CERTS_RE.finditer(pem_bundle)
160
+ ]
161
+ if not der_certs:
162
+ raise ssl.SSLError("No root certificates specified")
163
+
164
+ cert_array = CoreFoundation.CFArrayCreateMutable(
165
+ CoreFoundation.kCFAllocatorDefault,
166
+ 0,
167
+ ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
168
+ )
169
+ if not cert_array:
170
+ raise ssl.SSLError("Unable to allocate memory!")
171
+
172
+ try:
173
+ for der_bytes in der_certs:
174
+ certdata = _cf_data_from_bytes(der_bytes)
175
+ if not certdata:
176
+ raise ssl.SSLError("Unable to allocate memory!")
177
+ cert = Security.SecCertificateCreateWithData(
178
+ CoreFoundation.kCFAllocatorDefault, certdata
179
+ )
180
+ CoreFoundation.CFRelease(certdata)
181
+ if not cert:
182
+ raise ssl.SSLError("Unable to build cert object!")
183
+
184
+ CoreFoundation.CFArrayAppendValue(cert_array, cert)
185
+ CoreFoundation.CFRelease(cert)
186
+ except Exception:
187
+ # We need to free the array before the exception bubbles further.
188
+ # We only want to do that if an error occurs: otherwise, the caller
189
+ # should free.
190
+ CoreFoundation.CFRelease(cert_array)
191
+ raise
192
+
193
+ return cert_array
194
+
195
+
196
+ def _is_cert(item):
197
+ """
198
+ Returns True if a given CFTypeRef is a certificate.
199
+ """
200
+ expected = Security.SecCertificateGetTypeID()
201
+ return CoreFoundation.CFGetTypeID(item) == expected
202
+
203
+
204
+ def _is_identity(item):
205
+ """
206
+ Returns True if a given CFTypeRef is an identity.
207
+ """
208
+ expected = Security.SecIdentityGetTypeID()
209
+ return CoreFoundation.CFGetTypeID(item) == expected
210
+
211
+
212
+ def _temporary_keychain():
213
+ """
214
+ This function creates a temporary Mac keychain that we can use to work with
215
+ credentials. This keychain uses a one-time password and a temporary file to
216
+ store the data. We expect to have one keychain per socket. The returned
217
+ SecKeychainRef must be freed by the caller, including calling
218
+ SecKeychainDelete.
219
+
220
+ Returns a tuple of the SecKeychainRef and the path to the temporary
221
+ directory that contains it.
222
+ """
223
+ # Unfortunately, SecKeychainCreate requires a path to a keychain. This
224
+ # means we cannot use mkstemp to use a generic temporary file. Instead,
225
+ # we're going to create a temporary directory and a filename to use there.
226
+ # This filename will be 8 random bytes expanded into base64. We also need
227
+ # some random bytes to password-protect the keychain we're creating, so we
228
+ # ask for 40 random bytes.
229
+ random_bytes = os.urandom(40)
230
+ filename = base64.b16encode(random_bytes[:8]).decode("utf-8")
231
+ password = base64.b16encode(random_bytes[8:]) # Must be valid UTF-8
232
+ tempdirectory = tempfile.mkdtemp()
233
+
234
+ keychain_path = os.path.join(tempdirectory, filename).encode("utf-8")
235
+
236
+ # We now want to create the keychain itself.
237
+ keychain = Security.SecKeychainRef()
238
+ status = Security.SecKeychainCreate(
239
+ keychain_path, len(password), password, False, None, ctypes.byref(keychain)
240
+ )
241
+ _assert_no_error(status)
242
+
243
+ # Having created the keychain, we want to pass it off to the caller.
244
+ return keychain, tempdirectory
245
+
246
+
247
+ def _load_items_from_file(keychain, path):
248
+ """
249
+ Given a single file, loads all the trust objects from it into arrays and
250
+ the keychain.
251
+ Returns a tuple of lists: the first list is a list of identities, the
252
+ second a list of certs.
253
+ """
254
+ certificates = []
255
+ identities = []
256
+ result_array = None
257
+
258
+ with open(path, "rb") as f:
259
+ raw_filedata = f.read()
260
+
261
+ try:
262
+ filedata = CoreFoundation.CFDataCreate(
263
+ CoreFoundation.kCFAllocatorDefault, raw_filedata, len(raw_filedata)
264
+ )
265
+ result_array = CoreFoundation.CFArrayRef()
266
+ result = Security.SecItemImport(
267
+ filedata, # cert data
268
+ None, # Filename, leaving it out for now
269
+ None, # What the type of the file is, we don't care
270
+ None, # what's in the file, we don't care
271
+ 0, # import flags
272
+ None, # key params, can include passphrase in the future
273
+ keychain, # The keychain to insert into
274
+ ctypes.byref(result_array), # Results
275
+ )
276
+ _assert_no_error(result)
277
+
278
+ # A CFArray is not very useful to us as an intermediary
279
+ # representation, so we are going to extract the objects we want
280
+ # and then free the array. We don't need to keep hold of keys: the
281
+ # keychain already has them!
282
+ result_count = CoreFoundation.CFArrayGetCount(result_array)
283
+ for index in range(result_count):
284
+ item = CoreFoundation.CFArrayGetValueAtIndex(result_array, index)
285
+ item = ctypes.cast(item, CoreFoundation.CFTypeRef)
286
+
287
+ if _is_cert(item):
288
+ CoreFoundation.CFRetain(item)
289
+ certificates.append(item)
290
+ elif _is_identity(item):
291
+ CoreFoundation.CFRetain(item)
292
+ identities.append(item)
293
+ finally:
294
+ if result_array:
295
+ CoreFoundation.CFRelease(result_array)
296
+
297
+ CoreFoundation.CFRelease(filedata)
298
+
299
+ return (identities, certificates)
300
+
301
+
302
+ def _load_client_cert_chain(keychain, *paths):
303
+ """
304
+ Load certificates and maybe keys from a number of files. Has the end goal
305
+ of returning a CFArray containing one SecIdentityRef, and then zero or more
306
+ SecCertificateRef objects, suitable for use as a client certificate trust
307
+ chain.
308
+ """
309
+ # Ok, the strategy.
310
+ #
311
+ # This relies on knowing that macOS will not give you a SecIdentityRef
312
+ # unless you have imported a key into a keychain. This is a somewhat
313
+ # artificial limitation of macOS (for example, it doesn't necessarily
314
+ # affect iOS), but there is nothing inside Security.framework that lets you
315
+ # get a SecIdentityRef without having a key in a keychain.
316
+ #
317
+ # So the policy here is we take all the files and iterate them in order.
318
+ # Each one will use SecItemImport to have one or more objects loaded from
319
+ # it. We will also point at a keychain that macOS can use to work with the
320
+ # private key.
321
+ #
322
+ # Once we have all the objects, we'll check what we actually have. If we
323
+ # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise,
324
+ # we'll take the first certificate (which we assume to be our leaf) and
325
+ # ask the keychain to give us a SecIdentityRef with that cert's associated
326
+ # key.
327
+ #
328
+ # We'll then return a CFArray containing the trust chain: one
329
+ # SecIdentityRef and then zero-or-more SecCertificateRef objects. The
330
+ # responsibility for freeing this CFArray will be with the caller. This
331
+ # CFArray must remain alive for the entire connection, so in practice it
332
+ # will be stored with a single SSLSocket, along with the reference to the
333
+ # keychain.
334
+ certificates = []
335
+ identities = []
336
+
337
+ # Filter out bad paths.
338
+ paths = (path for path in paths if path)
339
+
340
+ try:
341
+ for file_path in paths:
342
+ new_identities, new_certs = _load_items_from_file(keychain, file_path)
343
+ identities.extend(new_identities)
344
+ certificates.extend(new_certs)
345
+
346
+ # Ok, we have everything. The question is: do we have an identity? If
347
+ # not, we want to grab one from the first cert we have.
348
+ if not identities:
349
+ new_identity = Security.SecIdentityRef()
350
+ status = Security.SecIdentityCreateWithCertificate(
351
+ keychain, certificates[0], ctypes.byref(new_identity)
352
+ )
353
+ _assert_no_error(status)
354
+ identities.append(new_identity)
355
+
356
+ # We now want to release the original certificate, as we no longer
357
+ # need it.
358
+ CoreFoundation.CFRelease(certificates.pop(0))
359
+
360
+ # We now need to build a new CFArray that holds the trust chain.
361
+ trust_chain = CoreFoundation.CFArrayCreateMutable(
362
+ CoreFoundation.kCFAllocatorDefault,
363
+ 0,
364
+ ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
365
+ )
366
+ for item in itertools.chain(identities, certificates):
367
+ # ArrayAppendValue does a CFRetain on the item. That's fine,
368
+ # because the finally block will release our other refs to them.
369
+ CoreFoundation.CFArrayAppendValue(trust_chain, item)
370
+
371
+ return trust_chain
372
+ finally:
373
+ for obj in itertools.chain(identities, certificates):
374
+ CoreFoundation.CFRelease(obj)
375
+
376
+
377
+ TLS_PROTOCOL_VERSIONS = {
378
+ "SSLv2": (0, 2),
379
+ "SSLv3": (3, 0),
380
+ "TLSv1": (3, 1),
381
+ "TLSv1.1": (3, 2),
382
+ "TLSv1.2": (3, 3),
383
+ }
384
+
385
+
386
+ def _build_tls_unknown_ca_alert(version):
387
+ """
388
+ Builds a TLS alert record for an unknown CA.
389
+ """
390
+ ver_maj, ver_min = TLS_PROTOCOL_VERSIONS[version]
391
+ severity_fatal = 0x02
392
+ description_unknown_ca = 0x30
393
+ msg = struct.pack(">BB", severity_fatal, description_unknown_ca)
394
+ msg_len = len(msg)
395
+ record_type_alert = 0x15
396
+ record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg
397
+ return record
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/contrib/appengine.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides a pool manager that uses Google App Engine's
3
+ `URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
4
+
5
+ Example usage::
6
+
7
+ from pip._vendor.urllib3 import PoolManager
8
+ from pip._vendor.urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
9
+
10
+ if is_appengine_sandbox():
11
+ # AppEngineManager uses AppEngine's URLFetch API behind the scenes
12
+ http = AppEngineManager()
13
+ else:
14
+ # PoolManager uses a socket-level API behind the scenes
15
+ http = PoolManager()
16
+
17
+ r = http.request('GET', 'https://google.com/')
18
+
19
+ There are `limitations <https://cloud.google.com/appengine/docs/python/\
20
+ urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
21
+ the best choice for your application. There are three options for using
22
+ urllib3 on Google App Engine:
23
+
24
+ 1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
25
+ cost-effective in many circumstances as long as your usage is within the
26
+ limitations.
27
+ 2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
28
+ Sockets also have `limitations and restrictions
29
+ <https://cloud.google.com/appengine/docs/python/sockets/\
30
+ #limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
31
+ To use sockets, be sure to specify the following in your ``app.yaml``::
32
+
33
+ env_variables:
34
+ GAE_USE_SOCKETS_HTTPLIB : 'true'
35
+
36
+ 3. If you are using `App Engine Flexible
37
+ <https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
38
+ :class:`PoolManager` without any configuration or special environment variables.
39
+ """
40
+
41
+ from __future__ import absolute_import
42
+
43
+ import io
44
+ import logging
45
+ import warnings
46
+
47
+ from ..exceptions import (
48
+ HTTPError,
49
+ HTTPWarning,
50
+ MaxRetryError,
51
+ ProtocolError,
52
+ SSLError,
53
+ TimeoutError,
54
+ )
55
+ from ..packages.six.moves.urllib.parse import urljoin
56
+ from ..request import RequestMethods
57
+ from ..response import HTTPResponse
58
+ from ..util.retry import Retry
59
+ from ..util.timeout import Timeout
60
+ from . import _appengine_environ
61
+
62
+ try:
63
+ from google.appengine.api import urlfetch
64
+ except ImportError:
65
+ urlfetch = None
66
+
67
+
68
+ log = logging.getLogger(__name__)
69
+
70
+
71
+ class AppEnginePlatformWarning(HTTPWarning):
72
+ pass
73
+
74
+
75
+ class AppEnginePlatformError(HTTPError):
76
+ pass
77
+
78
+
79
+ class AppEngineManager(RequestMethods):
80
+ """
81
+ Connection manager for Google App Engine sandbox applications.
82
+
83
+ This manager uses the URLFetch service directly instead of using the
84
+ emulated httplib, and is subject to URLFetch limitations as described in
85
+ the App Engine documentation `here
86
+ <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
87
+
88
+ Notably it will raise an :class:`AppEnginePlatformError` if:
89
+ * URLFetch is not available.
90
+ * If you attempt to use this on App Engine Flexible, as full socket
91
+ support is available.
92
+ * If a request size is more than 10 megabytes.
93
+ * If a response size is more than 32 megabytes.
94
+ * If you use an unsupported request method such as OPTIONS.
95
+
96
+ Beyond those cases, it will raise normal urllib3 errors.
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ headers=None,
102
+ retries=None,
103
+ validate_certificate=True,
104
+ urlfetch_retries=True,
105
+ ):
106
+ if not urlfetch:
107
+ raise AppEnginePlatformError(
108
+ "URLFetch is not available in this environment."
109
+ )
110
+
111
+ warnings.warn(
112
+ "urllib3 is using URLFetch on Google App Engine sandbox instead "
113
+ "of sockets. To use sockets directly instead of URLFetch see "
114
+ "https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
115
+ AppEnginePlatformWarning,
116
+ )
117
+
118
+ RequestMethods.__init__(self, headers)
119
+ self.validate_certificate = validate_certificate
120
+ self.urlfetch_retries = urlfetch_retries
121
+
122
+ self.retries = retries or Retry.DEFAULT
123
+
124
+ def __enter__(self):
125
+ return self
126
+
127
+ def __exit__(self, exc_type, exc_val, exc_tb):
128
+ # Return False to re-raise any potential exceptions
129
+ return False
130
+
131
+ def urlopen(
132
+ self,
133
+ method,
134
+ url,
135
+ body=None,
136
+ headers=None,
137
+ retries=None,
138
+ redirect=True,
139
+ timeout=Timeout.DEFAULT_TIMEOUT,
140
+ **response_kw
141
+ ):
142
+
143
+ retries = self._get_retries(retries, redirect)
144
+
145
+ try:
146
+ follow_redirects = redirect and retries.redirect != 0 and retries.total
147
+ response = urlfetch.fetch(
148
+ url,
149
+ payload=body,
150
+ method=method,
151
+ headers=headers or {},
152
+ allow_truncated=False,
153
+ follow_redirects=self.urlfetch_retries and follow_redirects,
154
+ deadline=self._get_absolute_timeout(timeout),
155
+ validate_certificate=self.validate_certificate,
156
+ )
157
+ except urlfetch.DeadlineExceededError as e:
158
+ raise TimeoutError(self, e)
159
+
160
+ except urlfetch.InvalidURLError as e:
161
+ if "too large" in str(e):
162
+ raise AppEnginePlatformError(
163
+ "URLFetch request too large, URLFetch only "
164
+ "supports requests up to 10mb in size.",
165
+ e,
166
+ )
167
+ raise ProtocolError(e)
168
+
169
+ except urlfetch.DownloadError as e:
170
+ if "Too many redirects" in str(e):
171
+ raise MaxRetryError(self, url, reason=e)
172
+ raise ProtocolError(e)
173
+
174
+ except urlfetch.ResponseTooLargeError as e:
175
+ raise AppEnginePlatformError(
176
+ "URLFetch response too large, URLFetch only supports"
177
+ "responses up to 32mb in size.",
178
+ e,
179
+ )
180
+
181
+ except urlfetch.SSLCertificateError as e:
182
+ raise SSLError(e)
183
+
184
+ except urlfetch.InvalidMethodError as e:
185
+ raise AppEnginePlatformError(
186
+ "URLFetch does not support method: %s" % method, e
187
+ )
188
+
189
+ http_response = self._urlfetch_response_to_http_response(
190
+ response, retries=retries, **response_kw
191
+ )
192
+
193
+ # Handle redirect?
194
+ redirect_location = redirect and http_response.get_redirect_location()
195
+ if redirect_location:
196
+ # Check for redirect response
197
+ if self.urlfetch_retries and retries.raise_on_redirect:
198
+ raise MaxRetryError(self, url, "too many redirects")
199
+ else:
200
+ if http_response.status == 303:
201
+ method = "GET"
202
+
203
+ try:
204
+ retries = retries.increment(
205
+ method, url, response=http_response, _pool=self
206
+ )
207
+ except MaxRetryError:
208
+ if retries.raise_on_redirect:
209
+ raise MaxRetryError(self, url, "too many redirects")
210
+ return http_response
211
+
212
+ retries.sleep_for_retry(http_response)
213
+ log.debug("Redirecting %s -> %s", url, redirect_location)
214
+ redirect_url = urljoin(url, redirect_location)
215
+ return self.urlopen(
216
+ method,
217
+ redirect_url,
218
+ body,
219
+ headers,
220
+ retries=retries,
221
+ redirect=redirect,
222
+ timeout=timeout,
223
+ **response_kw
224
+ )
225
+
226
+ # Check if we should retry the HTTP response.
227
+ has_retry_after = bool(http_response.headers.get("Retry-After"))
228
+ if retries.is_retry(method, http_response.status, has_retry_after):
229
+ retries = retries.increment(method, url, response=http_response, _pool=self)
230
+ log.debug("Retry: %s", url)
231
+ retries.sleep(http_response)
232
+ return self.urlopen(
233
+ method,
234
+ url,
235
+ body=body,
236
+ headers=headers,
237
+ retries=retries,
238
+ redirect=redirect,
239
+ timeout=timeout,
240
+ **response_kw
241
+ )
242
+
243
+ return http_response
244
+
245
+ def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
246
+
247
+ if is_prod_appengine():
248
+ # Production GAE handles deflate encoding automatically, but does
249
+ # not remove the encoding header.
250
+ content_encoding = urlfetch_resp.headers.get("content-encoding")
251
+
252
+ if content_encoding == "deflate":
253
+ del urlfetch_resp.headers["content-encoding"]
254
+
255
+ transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
256
+ # We have a full response's content,
257
+ # so let's make sure we don't report ourselves as chunked data.
258
+ if transfer_encoding == "chunked":
259
+ encodings = transfer_encoding.split(",")
260
+ encodings.remove("chunked")
261
+ urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
262
+
263
+ original_response = HTTPResponse(
264
+ # In order for decoding to work, we must present the content as
265
+ # a file-like object.
266
+ body=io.BytesIO(urlfetch_resp.content),
267
+ msg=urlfetch_resp.header_msg,
268
+ headers=urlfetch_resp.headers,
269
+ status=urlfetch_resp.status_code,
270
+ **response_kw
271
+ )
272
+
273
+ return HTTPResponse(
274
+ body=io.BytesIO(urlfetch_resp.content),
275
+ headers=urlfetch_resp.headers,
276
+ status=urlfetch_resp.status_code,
277
+ original_response=original_response,
278
+ **response_kw
279
+ )
280
+
281
+ def _get_absolute_timeout(self, timeout):
282
+ if timeout is Timeout.DEFAULT_TIMEOUT:
283
+ return None # Defer to URLFetch's default.
284
+ if isinstance(timeout, Timeout):
285
+ if timeout._read is not None or timeout._connect is not None:
286
+ warnings.warn(
287
+ "URLFetch does not support granular timeout settings, "
288
+ "reverting to total or default URLFetch timeout.",
289
+ AppEnginePlatformWarning,
290
+ )
291
+ return timeout.total
292
+ return timeout
293
+
294
+ def _get_retries(self, retries, redirect):
295
+ if not isinstance(retries, Retry):
296
+ retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
297
+
298
+ if retries.connect or retries.read or retries.redirect:
299
+ warnings.warn(
300
+ "URLFetch only supports total retries and does not "
301
+ "recognize connect, read, or redirect retry parameters.",
302
+ AppEnginePlatformWarning,
303
+ )
304
+
305
+ return retries
306
+
307
+
308
+ # Alias methods from _appengine_environ to maintain public API interface.
309
+
310
+ is_appengine = _appengine_environ.is_appengine
311
+ is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
312
+ is_local_appengine = _appengine_environ.is_local_appengine
313
+ is_prod_appengine = _appengine_environ.is_prod_appengine
314
+ is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/request.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ from base64 import b64encode
4
+
5
+ from ..exceptions import UnrewindableBodyError
6
+ from ..packages.six import b, integer_types
7
+
8
+ # Pass as a value within ``headers`` to skip
9
+ # emitting some HTTP headers that are added automatically.
10
+ # The only headers that are supported are ``Accept-Encoding``,
11
+ # ``Host``, and ``User-Agent``.
12
+ SKIP_HEADER = "@@@SKIP_HEADER@@@"
13
+ SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"])
14
+
15
+ ACCEPT_ENCODING = "gzip,deflate"
16
+
17
+ _FAILEDTELL = object()
18
+
19
+
20
+ def make_headers(
21
+ keep_alive=None,
22
+ accept_encoding=None,
23
+ user_agent=None,
24
+ basic_auth=None,
25
+ proxy_basic_auth=None,
26
+ disable_cache=None,
27
+ ):
28
+ """
29
+ Shortcuts for generating request headers.
30
+
31
+ :param keep_alive:
32
+ If ``True``, adds 'connection: keep-alive' header.
33
+
34
+ :param accept_encoding:
35
+ Can be a boolean, list, or string.
36
+ ``True`` translates to 'gzip,deflate'.
37
+ List will get joined by comma.
38
+ String will be used as provided.
39
+
40
+ :param user_agent:
41
+ String representing the user-agent you want, such as
42
+ "python-urllib3/0.6"
43
+
44
+ :param basic_auth:
45
+ Colon-separated username:password string for 'authorization: basic ...'
46
+ auth header.
47
+
48
+ :param proxy_basic_auth:
49
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
50
+ auth header.
51
+
52
+ :param disable_cache:
53
+ If ``True``, adds 'cache-control: no-cache' header.
54
+
55
+ Example::
56
+
57
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
58
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
59
+ >>> make_headers(accept_encoding=True)
60
+ {'accept-encoding': 'gzip,deflate'}
61
+ """
62
+ headers = {}
63
+ if accept_encoding:
64
+ if isinstance(accept_encoding, str):
65
+ pass
66
+ elif isinstance(accept_encoding, list):
67
+ accept_encoding = ",".join(accept_encoding)
68
+ else:
69
+ accept_encoding = ACCEPT_ENCODING
70
+ headers["accept-encoding"] = accept_encoding
71
+
72
+ if user_agent:
73
+ headers["user-agent"] = user_agent
74
+
75
+ if keep_alive:
76
+ headers["connection"] = "keep-alive"
77
+
78
+ if basic_auth:
79
+ headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8")
80
+
81
+ if proxy_basic_auth:
82
+ headers["proxy-authorization"] = "Basic " + b64encode(
83
+ b(proxy_basic_auth)
84
+ ).decode("utf-8")
85
+
86
+ if disable_cache:
87
+ headers["cache-control"] = "no-cache"
88
+
89
+ return headers
90
+
91
+
92
+ def set_file_position(body, pos):
93
+ """
94
+ If a position is provided, move file to that point.
95
+ Otherwise, we'll attempt to record a position for future use.
96
+ """
97
+ if pos is not None:
98
+ rewind_body(body, pos)
99
+ elif getattr(body, "tell", None) is not None:
100
+ try:
101
+ pos = body.tell()
102
+ except (IOError, OSError):
103
+ # This differentiates from None, allowing us to catch
104
+ # a failed `tell()` later when trying to rewind the body.
105
+ pos = _FAILEDTELL
106
+
107
+ return pos
108
+
109
+
110
+ def rewind_body(body, body_pos):
111
+ """
112
+ Attempt to rewind body to a certain position.
113
+ Primarily used for request redirects and retries.
114
+
115
+ :param body:
116
+ File-like object that supports seek.
117
+
118
+ :param int pos:
119
+ Position to seek to in file.
120
+ """
121
+ body_seek = getattr(body, "seek", None)
122
+ if body_seek is not None and isinstance(body_pos, integer_types):
123
+ try:
124
+ body_seek(body_pos)
125
+ except (IOError, OSError):
126
+ raise UnrewindableBodyError(
127
+ "An error occurred when rewinding request body for redirect/retry."
128
+ )
129
+ elif body_pos is _FAILEDTELL:
130
+ raise UnrewindableBodyError(
131
+ "Unable to record file position for rewinding "
132
+ "request body during a redirect/retry."
133
+ )
134
+ else:
135
+ raise ValueError(
136
+ "body_pos must be of type integer, instead it was %s." % type(body_pos)
137
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """The match_hostname() function from Python 3.3.3, essential when using SSL."""
2
+
3
+ # Note: This file is under the PSF license as the code comes from the python
4
+ # stdlib. http://docs.python.org/3/license.html
5
+
6
+ import re
7
+ import sys
8
+
9
+ # ipaddress has been backported to 2.6+ in pypi. If it is installed on the
10
+ # system, use it to handle IPAddress ServerAltnames (this was added in
11
+ # python-3.5) otherwise only do DNS matching. This allows
12
+ # util.ssl_match_hostname to continue to be used in Python 2.7.
13
+ try:
14
+ import ipaddress
15
+ except ImportError:
16
+ ipaddress = None
17
+
18
+ __version__ = "3.5.0.1"
19
+
20
+
21
+ class CertificateError(ValueError):
22
+ pass
23
+
24
+
25
+ def _dnsname_match(dn, hostname, max_wildcards=1):
26
+ """Matching according to RFC 6125, section 6.4.3
27
+
28
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
29
+ """
30
+ pats = []
31
+ if not dn:
32
+ return False
33
+
34
+ # Ported from python3-syntax:
35
+ # leftmost, *remainder = dn.split(r'.')
36
+ parts = dn.split(r".")
37
+ leftmost = parts[0]
38
+ remainder = parts[1:]
39
+
40
+ wildcards = leftmost.count("*")
41
+ if wildcards > max_wildcards:
42
+ # Issue #17980: avoid denials of service by refusing more
43
+ # than one wildcard per fragment. A survey of established
44
+ # policy among SSL implementations showed it to be a
45
+ # reasonable choice.
46
+ raise CertificateError(
47
+ "too many wildcards in certificate DNS name: " + repr(dn)
48
+ )
49
+
50
+ # speed up common case w/o wildcards
51
+ if not wildcards:
52
+ return dn.lower() == hostname.lower()
53
+
54
+ # RFC 6125, section 6.4.3, subitem 1.
55
+ # The client SHOULD NOT attempt to match a presented identifier in which
56
+ # the wildcard character comprises a label other than the left-most label.
57
+ if leftmost == "*":
58
+ # When '*' is a fragment by itself, it matches a non-empty dotless
59
+ # fragment.
60
+ pats.append("[^.]+")
61
+ elif leftmost.startswith("xn--") or hostname.startswith("xn--"):
62
+ # RFC 6125, section 6.4.3, subitem 3.
63
+ # The client SHOULD NOT attempt to match a presented identifier
64
+ # where the wildcard character is embedded within an A-label or
65
+ # U-label of an internationalized domain name.
66
+ pats.append(re.escape(leftmost))
67
+ else:
68
+ # Otherwise, '*' matches any dotless string, e.g. www*
69
+ pats.append(re.escape(leftmost).replace(r"\*", "[^.]*"))
70
+
71
+ # add the remaining fragments, ignore any wildcards
72
+ for frag in remainder:
73
+ pats.append(re.escape(frag))
74
+
75
+ pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE)
76
+ return pat.match(hostname)
77
+
78
+
79
+ def _to_unicode(obj):
80
+ if isinstance(obj, str) and sys.version_info < (3,):
81
+ # ignored flake8 # F821 to support python 2.7 function
82
+ obj = unicode(obj, encoding="ascii", errors="strict") # noqa: F821
83
+ return obj
84
+
85
+
86
+ def _ipaddress_match(ipname, host_ip):
87
+ """Exact matching of IP addresses.
88
+
89
+ RFC 6125 explicitly doesn't define an algorithm for this
90
+ (section 1.7.2 - "Out of Scope").
91
+ """
92
+ # OpenSSL may add a trailing newline to a subjectAltName's IP address
93
+ # Divergence from upstream: ipaddress can't handle byte str
94
+ ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
95
+ return ip == host_ip
96
+
97
+
98
+ def match_hostname(cert, hostname):
99
+ """Verify that *cert* (in decoded format as returned by
100
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
101
+ rules are followed, but IP addresses are not accepted for *hostname*.
102
+
103
+ CertificateError is raised on failure. On success, the function
104
+ returns nothing.
105
+ """
106
+ if not cert:
107
+ raise ValueError(
108
+ "empty or no certificate, match_hostname needs a "
109
+ "SSL socket or SSL context with either "
110
+ "CERT_OPTIONAL or CERT_REQUIRED"
111
+ )
112
+ try:
113
+ # Divergence from upstream: ipaddress can't handle byte str
114
+ host_ip = ipaddress.ip_address(_to_unicode(hostname))
115
+ except (UnicodeError, ValueError):
116
+ # ValueError: Not an IP address (common case)
117
+ # UnicodeError: Divergence from upstream: Have to deal with ipaddress not taking
118
+ # byte strings. addresses should be all ascii, so we consider it not
119
+ # an ipaddress in this case
120
+ host_ip = None
121
+ except AttributeError:
122
+ # Divergence from upstream: Make ipaddress library optional
123
+ if ipaddress is None:
124
+ host_ip = None
125
+ else: # Defensive
126
+ raise
127
+ dnsnames = []
128
+ san = cert.get("subjectAltName", ())
129
+ for key, value in san:
130
+ if key == "DNS":
131
+ if host_ip is None and _dnsname_match(value, hostname):
132
+ return
133
+ dnsnames.append(value)
134
+ elif key == "IP Address":
135
+ if host_ip is not None and _ipaddress_match(value, host_ip):
136
+ return
137
+ dnsnames.append(value)
138
+ if not dnsnames:
139
+ # The subject is only checked when there is no dNSName entry
140
+ # in subjectAltName
141
+ for sub in cert.get("subject", ()):
142
+ for key, value in sub:
143
+ # XXX according to RFC 2818, the most specific Common Name
144
+ # must be used.
145
+ if key == "commonName":
146
+ if _dnsname_match(value, hostname):
147
+ return
148
+ dnsnames.append(value)
149
+ if len(dnsnames) > 1:
150
+ raise CertificateError(
151
+ "hostname %r "
152
+ "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames)))
153
+ )
154
+ elif len(dnsnames) == 1:
155
+ raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0]))
156
+ else:
157
+ raise CertificateError(
158
+ "no appropriate commonName or subjectAltName fields were found"
159
+ )
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/urllib3/util/ssltransport.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import socket
3
+ import ssl
4
+
5
+ from ..exceptions import ProxySchemeUnsupported
6
+ from ..packages import six
7
+
8
+ SSL_BLOCKSIZE = 16384
9
+
10
+
11
+ class SSLTransport:
12
+ """
13
+ The SSLTransport wraps an existing socket and establishes an SSL connection.
14
+
15
+ Contrary to Python's implementation of SSLSocket, it allows you to chain
16
+ multiple TLS connections together. It's particularly useful if you need to
17
+ implement TLS within TLS.
18
+
19
+ The class supports most of the socket API operations.
20
+ """
21
+
22
+ @staticmethod
23
+ def _validate_ssl_context_for_tls_in_tls(ssl_context):
24
+ """
25
+ Raises a ProxySchemeUnsupported if the provided ssl_context can't be used
26
+ for TLS in TLS.
27
+
28
+ The only requirement is that the ssl_context provides the 'wrap_bio'
29
+ methods.
30
+ """
31
+
32
+ if not hasattr(ssl_context, "wrap_bio"):
33
+ if six.PY2:
34
+ raise ProxySchemeUnsupported(
35
+ "TLS in TLS requires SSLContext.wrap_bio() which isn't "
36
+ "supported on Python 2"
37
+ )
38
+ else:
39
+ raise ProxySchemeUnsupported(
40
+ "TLS in TLS requires SSLContext.wrap_bio() which isn't "
41
+ "available on non-native SSLContext"
42
+ )
43
+
44
+ def __init__(
45
+ self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True
46
+ ):
47
+ """
48
+ Create an SSLTransport around socket using the provided ssl_context.
49
+ """
50
+ self.incoming = ssl.MemoryBIO()
51
+ self.outgoing = ssl.MemoryBIO()
52
+
53
+ self.suppress_ragged_eofs = suppress_ragged_eofs
54
+ self.socket = socket
55
+
56
+ self.sslobj = ssl_context.wrap_bio(
57
+ self.incoming, self.outgoing, server_hostname=server_hostname
58
+ )
59
+
60
+ # Perform initial handshake.
61
+ self._ssl_io_loop(self.sslobj.do_handshake)
62
+
63
+ def __enter__(self):
64
+ return self
65
+
66
+ def __exit__(self, *_):
67
+ self.close()
68
+
69
+ def fileno(self):
70
+ return self.socket.fileno()
71
+
72
+ def read(self, len=1024, buffer=None):
73
+ return self._wrap_ssl_read(len, buffer)
74
+
75
+ def recv(self, len=1024, flags=0):
76
+ if flags != 0:
77
+ raise ValueError("non-zero flags not allowed in calls to recv")
78
+ return self._wrap_ssl_read(len)
79
+
80
+ def recv_into(self, buffer, nbytes=None, flags=0):
81
+ if flags != 0:
82
+ raise ValueError("non-zero flags not allowed in calls to recv_into")
83
+ if buffer and (nbytes is None):
84
+ nbytes = len(buffer)
85
+ elif nbytes is None:
86
+ nbytes = 1024
87
+ return self.read(nbytes, buffer)
88
+
89
+ def sendall(self, data, flags=0):
90
+ if flags != 0:
91
+ raise ValueError("non-zero flags not allowed in calls to sendall")
92
+ count = 0
93
+ with memoryview(data) as view, view.cast("B") as byte_view:
94
+ amount = len(byte_view)
95
+ while count < amount:
96
+ v = self.send(byte_view[count:])
97
+ count += v
98
+
99
+ def send(self, data, flags=0):
100
+ if flags != 0:
101
+ raise ValueError("non-zero flags not allowed in calls to send")
102
+ response = self._ssl_io_loop(self.sslobj.write, data)
103
+ return response
104
+
105
+ def makefile(
106
+ self, mode="r", buffering=None, encoding=None, errors=None, newline=None
107
+ ):
108
+ """
109
+ Python's httpclient uses makefile and buffered io when reading HTTP
110
+ messages and we need to support it.
111
+
112
+ This is unfortunately a copy and paste of socket.py makefile with small
113
+ changes to point to the socket directly.
114
+ """
115
+ if not set(mode) <= {"r", "w", "b"}:
116
+ raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,))
117
+
118
+ writing = "w" in mode
119
+ reading = "r" in mode or not writing
120
+ assert reading or writing
121
+ binary = "b" in mode
122
+ rawmode = ""
123
+ if reading:
124
+ rawmode += "r"
125
+ if writing:
126
+ rawmode += "w"
127
+ raw = socket.SocketIO(self, rawmode)
128
+ self.socket._io_refs += 1
129
+ if buffering is None:
130
+ buffering = -1
131
+ if buffering < 0:
132
+ buffering = io.DEFAULT_BUFFER_SIZE
133
+ if buffering == 0:
134
+ if not binary:
135
+ raise ValueError("unbuffered streams must be binary")
136
+ return raw
137
+ if reading and writing:
138
+ buffer = io.BufferedRWPair(raw, raw, buffering)
139
+ elif reading:
140
+ buffer = io.BufferedReader(raw, buffering)
141
+ else:
142
+ assert writing
143
+ buffer = io.BufferedWriter(raw, buffering)
144
+ if binary:
145
+ return buffer
146
+ text = io.TextIOWrapper(buffer, encoding, errors, newline)
147
+ text.mode = mode
148
+ return text
149
+
150
+ def unwrap(self):
151
+ self._ssl_io_loop(self.sslobj.unwrap)
152
+
153
+ def close(self):
154
+ self.socket.close()
155
+
156
+ def getpeercert(self, binary_form=False):
157
+ return self.sslobj.getpeercert(binary_form)
158
+
159
+ def version(self):
160
+ return self.sslobj.version()
161
+
162
+ def cipher(self):
163
+ return self.sslobj.cipher()
164
+
165
+ def selected_alpn_protocol(self):
166
+ return self.sslobj.selected_alpn_protocol()
167
+
168
+ def selected_npn_protocol(self):
169
+ return self.sslobj.selected_npn_protocol()
170
+
171
+ def shared_ciphers(self):
172
+ return self.sslobj.shared_ciphers()
173
+
174
+ def compression(self):
175
+ return self.sslobj.compression()
176
+
177
+ def settimeout(self, value):
178
+ self.socket.settimeout(value)
179
+
180
+ def gettimeout(self):
181
+ return self.socket.gettimeout()
182
+
183
+ def _decref_socketios(self):
184
+ self.socket._decref_socketios()
185
+
186
+ def _wrap_ssl_read(self, len, buffer=None):
187
+ try:
188
+ return self._ssl_io_loop(self.sslobj.read, len, buffer)
189
+ except ssl.SSLError as e:
190
+ if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs:
191
+ return 0 # eof, return 0.
192
+ else:
193
+ raise
194
+
195
+ def _ssl_io_loop(self, func, *args):
196
+ """Performs an I/O loop between incoming/outgoing and the socket."""
197
+ should_loop = True
198
+ ret = None
199
+
200
+ while should_loop:
201
+ errno = None
202
+ try:
203
+ ret = func(*args)
204
+ except ssl.SSLError as e:
205
+ if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE):
206
+ # WANT_READ, and WANT_WRITE are expected, others are not.
207
+ raise e
208
+ errno = e.errno
209
+
210
+ buf = self.outgoing.read()
211
+ self.socket.sendall(buf)
212
+
213
+ if errno is None:
214
+ should_loop = False
215
+ elif errno == ssl.SSL_ERROR_WANT_READ:
216
+ buf = self.socket.recv(SSL_BLOCKSIZE)
217
+ if buf:
218
+ self.incoming.write(buf)
219
+ else:
220
+ self.incoming.write_eof()
221
+ return ret
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_torch_docs.py ADDED
The diff for this file is too large to render. See raw diff
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (722 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/modules/__pycache__/conv_fused.cpython-311.pyc ADDED
Binary file (32.4 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (421 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import torch
2
+ from .linear_relu import LinearReLU
3
+
4
+ __all__ = [
5
+ 'LinearReLU',
6
+ ]
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (726 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__pycache__/conv_relu.cpython-311.pyc ADDED
Binary file (10.5 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .modules import * # noqa: F403
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (256 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (308 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__pycache__/linear.cpython-311.pyc ADDED
Binary file (4.6 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .modules import * # noqa: F403
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (443 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc ADDED
Binary file (23.8 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc ADDED
Binary file (23.6 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/rnn.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numbers
2
+ from typing import Optional, Tuple
3
+ import warnings
4
+
5
+ import torch
6
+ from torch import Tensor
7
+
8
+ """
9
+ We will recreate all the RNN modules as we require the modules to be decomposed
10
+ into its building blocks to be able to observe.
11
+ """
12
+
13
+ __all__ = [
14
+ "LSTMCell",
15
+ "LSTM"
16
+ ]
17
+
18
+ class LSTMCell(torch.nn.Module):
19
+ r"""A quantizable long short-term memory (LSTM) cell.
20
+
21
+ For the description and the argument types, please, refer to :class:`~torch.nn.LSTMCell`
22
+
23
+ Examples::
24
+
25
+ >>> import torch.ao.nn.quantizable as nnqa
26
+ >>> rnn = nnqa.LSTMCell(10, 20)
27
+ >>> input = torch.randn(6, 10)
28
+ >>> hx = torch.randn(3, 20)
29
+ >>> cx = torch.randn(3, 20)
30
+ >>> output = []
31
+ >>> for i in range(6):
32
+ ... hx, cx = rnn(input[i], (hx, cx))
33
+ ... output.append(hx)
34
+ """
35
+ _FLOAT_MODULE = torch.nn.LSTMCell
36
+
37
+ def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
38
+ device=None, dtype=None) -> None:
39
+ factory_kwargs = {'device': device, 'dtype': dtype}
40
+ super().__init__()
41
+ self.input_size = input_dim
42
+ self.hidden_size = hidden_dim
43
+ self.bias = bias
44
+
45
+ self.igates = torch.nn.Linear(input_dim, 4 * hidden_dim, bias=bias, **factory_kwargs)
46
+ self.hgates = torch.nn.Linear(hidden_dim, 4 * hidden_dim, bias=bias, **factory_kwargs)
47
+ self.gates = torch.ao.nn.quantized.FloatFunctional()
48
+
49
+ self.input_gate = torch.nn.Sigmoid()
50
+ self.forget_gate = torch.nn.Sigmoid()
51
+ self.cell_gate = torch.nn.Tanh()
52
+ self.output_gate = torch.nn.Sigmoid()
53
+
54
+ self.fgate_cx = torch.ao.nn.quantized.FloatFunctional()
55
+ self.igate_cgate = torch.ao.nn.quantized.FloatFunctional()
56
+ self.fgate_cx_igate_cgate = torch.ao.nn.quantized.FloatFunctional()
57
+
58
+ self.ogate_cy = torch.ao.nn.quantized.FloatFunctional()
59
+
60
+ self.initial_hidden_state_qparams: Tuple[float, int] = (1.0, 0)
61
+ self.initial_cell_state_qparams: Tuple[float, int] = (1.0, 0)
62
+ self.hidden_state_dtype: torch.dtype = torch.quint8
63
+ self.cell_state_dtype: torch.dtype = torch.quint8
64
+
65
+ def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
66
+ if hidden is None or hidden[0] is None or hidden[1] is None:
67
+ hidden = self.initialize_hidden(x.shape[0], x.is_quantized)
68
+ hx, cx = hidden
69
+
70
+ igates = self.igates(x)
71
+ hgates = self.hgates(hx)
72
+ gates = self.gates.add(igates, hgates)
73
+
74
+ input_gate, forget_gate, cell_gate, out_gate = gates.chunk(4, 1)
75
+
76
+ input_gate = self.input_gate(input_gate)
77
+ forget_gate = self.forget_gate(forget_gate)
78
+ cell_gate = self.cell_gate(cell_gate)
79
+ out_gate = self.output_gate(out_gate)
80
+
81
+ fgate_cx = self.fgate_cx.mul(forget_gate, cx)
82
+ igate_cgate = self.igate_cgate.mul(input_gate, cell_gate)
83
+ fgate_cx_igate_cgate = self.fgate_cx_igate_cgate.add(fgate_cx, igate_cgate)
84
+ cy = fgate_cx_igate_cgate
85
+
86
+ # TODO: make this tanh a member of the module so its qparams can be configured
87
+ tanh_cy = torch.tanh(cy)
88
+ hy = self.ogate_cy.mul(out_gate, tanh_cy)
89
+ return hy, cy
90
+
91
+ def initialize_hidden(self, batch_size: int, is_quantized: bool = False) -> Tuple[Tensor, Tensor]:
92
+ h, c = torch.zeros((batch_size, self.hidden_size)), torch.zeros((batch_size, self.hidden_size))
93
+ if is_quantized:
94
+ (h_scale, h_zp) = self.initial_hidden_state_qparams
95
+ (c_scale, c_zp) = self.initial_cell_state_qparams
96
+ h = torch.quantize_per_tensor(h, scale=h_scale, zero_point=h_zp, dtype=self.hidden_state_dtype)
97
+ c = torch.quantize_per_tensor(c, scale=c_scale, zero_point=c_zp, dtype=self.cell_state_dtype)
98
+ return h, c
99
+
100
+ def _get_name(self):
101
+ return 'QuantizableLSTMCell'
102
+
103
+ @classmethod
104
+ def from_params(cls, wi, wh, bi=None, bh=None):
105
+ """Uses the weights and biases to create a new LSTM cell.
106
+
107
+ Args:
108
+ wi, wh: Weights for the input and hidden layers
109
+ bi, bh: Biases for the input and hidden layers
110
+ """
111
+ assert (bi is None) == (bh is None) # Either both None or both have values
112
+ input_size = wi.shape[1]
113
+ hidden_size = wh.shape[1]
114
+ cell = cls(input_dim=input_size, hidden_dim=hidden_size,
115
+ bias=(bi is not None))
116
+ cell.igates.weight = torch.nn.Parameter(wi)
117
+ if bi is not None:
118
+ cell.igates.bias = torch.nn.Parameter(bi)
119
+ cell.hgates.weight = torch.nn.Parameter(wh)
120
+ if bh is not None:
121
+ cell.hgates.bias = torch.nn.Parameter(bh)
122
+ return cell
123
+
124
+ @classmethod
125
+ def from_float(cls, other):
126
+ assert type(other) == cls._FLOAT_MODULE
127
+ assert hasattr(other, 'qconfig'), "The float module must have 'qconfig'"
128
+ observed = cls.from_params(other.weight_ih, other.weight_hh,
129
+ other.bias_ih, other.bias_hh)
130
+ observed.qconfig = other.qconfig
131
+ observed.igates.qconfig = other.qconfig
132
+ observed.hgates.qconfig = other.qconfig
133
+ return observed
134
+
135
+
136
+ class _LSTMSingleLayer(torch.nn.Module):
137
+ r"""A single one-directional LSTM layer.
138
+
139
+ The difference between a layer and a cell is that the layer can process a
140
+ sequence, while the cell only expects an instantaneous value.
141
+ """
142
+ def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
143
+ device=None, dtype=None) -> None:
144
+ factory_kwargs = {'device': device, 'dtype': dtype}
145
+ super().__init__()
146
+ self.cell = LSTMCell(input_dim, hidden_dim, bias=bias, **factory_kwargs)
147
+
148
+ def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
149
+ result = []
150
+ seq_len = x.shape[0]
151
+ for i in range(seq_len):
152
+ hidden = self.cell(x[i], hidden)
153
+ result.append(hidden[0]) # type: ignore[index]
154
+ result_tensor = torch.stack(result, 0)
155
+ return result_tensor, hidden
156
+
157
+ @classmethod
158
+ def from_params(cls, *args, **kwargs):
159
+ cell = LSTMCell.from_params(*args, **kwargs)
160
+ layer = cls(cell.input_size, cell.hidden_size, cell.bias)
161
+ layer.cell = cell
162
+ return layer
163
+
164
+
165
+ class _LSTMLayer(torch.nn.Module):
166
+ r"""A single bi-directional LSTM layer."""
167
+ def __init__(self, input_dim: int, hidden_dim: int, bias: bool = True,
168
+ batch_first: bool = False, bidirectional: bool = False,
169
+ device=None, dtype=None) -> None:
170
+ factory_kwargs = {'device': device, 'dtype': dtype}
171
+ super().__init__()
172
+ self.batch_first = batch_first
173
+ self.bidirectional = bidirectional
174
+ self.layer_fw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs)
175
+ if self.bidirectional:
176
+ self.layer_bw = _LSTMSingleLayer(input_dim, hidden_dim, bias=bias, **factory_kwargs)
177
+
178
+ def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
179
+ if self.batch_first:
180
+ x = x.transpose(0, 1)
181
+ if hidden is None:
182
+ hx_fw, cx_fw = (None, None)
183
+ else:
184
+ hx_fw, cx_fw = hidden
185
+ hidden_bw: Optional[Tuple[Tensor, Tensor]] = None
186
+ if self.bidirectional:
187
+ if hx_fw is None:
188
+ hx_bw = None
189
+ else:
190
+ hx_bw = hx_fw[1]
191
+ hx_fw = hx_fw[0]
192
+ if cx_fw is None:
193
+ cx_bw = None
194
+ else:
195
+ cx_bw = cx_fw[1]
196
+ cx_fw = cx_fw[0]
197
+ if hx_bw is not None and cx_bw is not None:
198
+ hidden_bw = hx_bw, cx_bw
199
+ if hx_fw is None and cx_fw is None:
200
+ hidden_fw = None
201
+ else:
202
+ hidden_fw = torch.jit._unwrap_optional(hx_fw), torch.jit._unwrap_optional(cx_fw)
203
+ result_fw, hidden_fw = self.layer_fw(x, hidden_fw)
204
+
205
+ if hasattr(self, 'layer_bw') and self.bidirectional:
206
+ x_reversed = x.flip(0)
207
+ result_bw, hidden_bw = self.layer_bw(x_reversed, hidden_bw)
208
+ result_bw = result_bw.flip(0)
209
+
210
+ result = torch.cat([result_fw, result_bw], result_fw.dim() - 1)
211
+ if hidden_fw is None and hidden_bw is None:
212
+ h = None
213
+ c = None
214
+ elif hidden_fw is None:
215
+ (h, c) = torch.jit._unwrap_optional(hidden_bw)
216
+ elif hidden_bw is None:
217
+ (h, c) = torch.jit._unwrap_optional(hidden_fw)
218
+ else:
219
+ h = torch.stack([hidden_fw[0], hidden_bw[0]], 0) # type: ignore[list-item]
220
+ c = torch.stack([hidden_fw[1], hidden_bw[1]], 0) # type: ignore[list-item]
221
+ else:
222
+ result = result_fw
223
+ h, c = torch.jit._unwrap_optional(hidden_fw) # type: ignore[assignment]
224
+
225
+ if self.batch_first:
226
+ result.transpose_(0, 1)
227
+
228
+ return result, (h, c)
229
+
230
+ @classmethod
231
+ def from_float(cls, other, layer_idx=0, qconfig=None, **kwargs):
232
+ r"""
233
+ There is no FP equivalent of this class. This function is here just to
234
+ mimic the behavior of the `prepare` within the `torch.ao.quantization`
235
+ flow.
236
+ """
237
+ assert hasattr(other, 'qconfig') or (qconfig is not None)
238
+
239
+ input_size = kwargs.get('input_size', other.input_size)
240
+ hidden_size = kwargs.get('hidden_size', other.hidden_size)
241
+ bias = kwargs.get('bias', other.bias)
242
+ batch_first = kwargs.get('batch_first', other.batch_first)
243
+ bidirectional = kwargs.get('bidirectional', other.bidirectional)
244
+
245
+ layer = cls(input_size, hidden_size, bias, batch_first, bidirectional)
246
+ layer.qconfig = getattr(other, 'qconfig', qconfig)
247
+ wi = getattr(other, f'weight_ih_l{layer_idx}')
248
+ wh = getattr(other, f'weight_hh_l{layer_idx}')
249
+ bi = getattr(other, f'bias_ih_l{layer_idx}', None)
250
+ bh = getattr(other, f'bias_hh_l{layer_idx}', None)
251
+
252
+ layer.layer_fw = _LSTMSingleLayer.from_params(wi, wh, bi, bh)
253
+
254
+ if other.bidirectional:
255
+ wi = getattr(other, f'weight_ih_l{layer_idx}_reverse')
256
+ wh = getattr(other, f'weight_hh_l{layer_idx}_reverse')
257
+ bi = getattr(other, f'bias_ih_l{layer_idx}_reverse', None)
258
+ bh = getattr(other, f'bias_hh_l{layer_idx}_reverse', None)
259
+ layer.layer_bw = _LSTMSingleLayer.from_params(wi, wh, bi, bh)
260
+ return layer
261
+
262
+
263
+ class LSTM(torch.nn.Module):
264
+ r"""A quantizable long short-term memory (LSTM).
265
+
266
+ For the description and the argument types, please, refer to :class:`~torch.nn.LSTM`
267
+
268
+ Attributes:
269
+ layers : instances of the `_LSTMLayer`
270
+
271
+ .. note::
272
+ To access the weights and biases, you need to access them per layer.
273
+ See examples below.
274
+
275
+ Examples::
276
+
277
+ >>> import torch.ao.nn.quantizable as nnqa
278
+ >>> rnn = nnqa.LSTM(10, 20, 2)
279
+ >>> input = torch.randn(5, 3, 10)
280
+ >>> h0 = torch.randn(2, 3, 20)
281
+ >>> c0 = torch.randn(2, 3, 20)
282
+ >>> output, (hn, cn) = rnn(input, (h0, c0))
283
+ >>> # To get the weights:
284
+ >>> # xdoctest: +SKIP
285
+ >>> print(rnn.layers[0].weight_ih)
286
+ tensor([[...]])
287
+ >>> print(rnn.layers[0].weight_hh)
288
+ AssertionError: There is no reverse path in the non-bidirectional layer
289
+ """
290
+ _FLOAT_MODULE = torch.nn.LSTM
291
+
292
+ def __init__(self, input_size: int, hidden_size: int,
293
+ num_layers: int = 1, bias: bool = True,
294
+ batch_first: bool = False, dropout: float = 0.,
295
+ bidirectional: bool = False,
296
+ device=None, dtype=None) -> None:
297
+ factory_kwargs = {'device': device, 'dtype': dtype}
298
+ super().__init__()
299
+ self.input_size = input_size
300
+ self.hidden_size = hidden_size
301
+ self.num_layers = num_layers
302
+ self.bias = bias
303
+ self.batch_first = batch_first
304
+ self.dropout = float(dropout)
305
+ self.bidirectional = bidirectional
306
+ self.training = False # Default to eval mode. If we want to train, we will explicitly set to training.
307
+ num_directions = 2 if bidirectional else 1
308
+
309
+ if not isinstance(dropout, numbers.Number) or not 0 <= dropout <= 1 or \
310
+ isinstance(dropout, bool):
311
+ raise ValueError("dropout should be a number in range [0, 1] "
312
+ "representing the probability of an element being "
313
+ "zeroed")
314
+ if dropout > 0:
315
+ warnings.warn("dropout option for quantizable LSTM is ignored. "
316
+ "If you are training, please, use nn.LSTM version "
317
+ "followed by `prepare` step.")
318
+ if num_layers == 1:
319
+ warnings.warn("dropout option adds dropout after all but last "
320
+ "recurrent layer, so non-zero dropout expects "
321
+ f"num_layers greater than 1, but got dropout={dropout} "
322
+ f"and num_layers={num_layers}")
323
+
324
+ layers = [_LSTMLayer(self.input_size, self.hidden_size,
325
+ self.bias, batch_first=False,
326
+ bidirectional=self.bidirectional, **factory_kwargs)]
327
+ for layer in range(1, num_layers):
328
+ layers.append(_LSTMLayer(self.hidden_size, self.hidden_size,
329
+ self.bias, batch_first=False,
330
+ bidirectional=self.bidirectional,
331
+ **factory_kwargs))
332
+ self.layers = torch.nn.ModuleList(layers)
333
+
334
+ def forward(self, x: Tensor, hidden: Optional[Tuple[Tensor, Tensor]] = None):
335
+ if self.batch_first:
336
+ x = x.transpose(0, 1)
337
+
338
+ max_batch_size = x.size(1)
339
+ num_directions = 2 if self.bidirectional else 1
340
+ if hidden is None:
341
+ zeros = torch.zeros(num_directions, max_batch_size,
342
+ self.hidden_size, dtype=torch.float,
343
+ device=x.device)
344
+ zeros.squeeze_(0)
345
+ if x.is_quantized:
346
+ zeros = torch.quantize_per_tensor(zeros, scale=1.0,
347
+ zero_point=0, dtype=x.dtype)
348
+ hxcx = [(zeros, zeros) for _ in range(self.num_layers)]
349
+ else:
350
+ hidden_non_opt = torch.jit._unwrap_optional(hidden)
351
+ if isinstance(hidden_non_opt[0], Tensor):
352
+ hx = hidden_non_opt[0].reshape(self.num_layers, num_directions,
353
+ max_batch_size,
354
+ self.hidden_size)
355
+ cx = hidden_non_opt[1].reshape(self.num_layers, num_directions,
356
+ max_batch_size,
357
+ self.hidden_size)
358
+ hxcx = [(hx[idx].squeeze(0), cx[idx].squeeze(0)) for idx in range(self.num_layers)]
359
+ else:
360
+ hxcx = hidden_non_opt
361
+
362
+ hx_list = []
363
+ cx_list = []
364
+ for idx, layer in enumerate(self.layers):
365
+ x, (h, c) = layer(x, hxcx[idx])
366
+ hx_list.append(torch.jit._unwrap_optional(h))
367
+ cx_list.append(torch.jit._unwrap_optional(c))
368
+ hx_tensor = torch.stack(hx_list)
369
+ cx_tensor = torch.stack(cx_list)
370
+
371
+ # We are creating another dimension for bidirectional case
372
+ # need to collapse it
373
+ hx_tensor = hx_tensor.reshape(-1, hx_tensor.shape[-2], hx_tensor.shape[-1])
374
+ cx_tensor = cx_tensor.reshape(-1, cx_tensor.shape[-2], cx_tensor.shape[-1])
375
+
376
+ if self.batch_first:
377
+ x = x.transpose(0, 1)
378
+
379
+ return x, (hx_tensor, cx_tensor)
380
+
381
+ def _get_name(self):
382
+ return 'QuantizableLSTM'
383
+
384
+ @classmethod
385
+ def from_float(cls, other, qconfig=None):
386
+ assert isinstance(other, cls._FLOAT_MODULE)
387
+ assert (hasattr(other, 'qconfig') or qconfig)
388
+ observed = cls(other.input_size, other.hidden_size, other.num_layers,
389
+ other.bias, other.batch_first, other.dropout,
390
+ other.bidirectional)
391
+ observed.qconfig = getattr(other, 'qconfig', qconfig)
392
+ for idx in range(other.num_layers):
393
+ observed.layers[idx] = _LSTMLayer.from_float(other, idx, qconfig,
394
+ batch_first=False)
395
+
396
+ # Prepare the model
397
+ if other.training:
398
+ observed.train()
399
+ observed = torch.ao.quantization.prepare_qat(observed, inplace=True)
400
+ else:
401
+ observed.eval()
402
+ observed = torch.ao.quantization.prepare(observed, inplace=True)
403
+ return observed
404
+
405
+ @classmethod
406
+ def from_observed(cls, other):
407
+ # The whole flow is float -> observed -> quantized
408
+ # This class does float -> observed only
409
+ raise NotImplementedError("It looks like you are trying to convert a "
410
+ "non-quantizable LSTM module. Please, see "
411
+ "the examples on quantizable LSTMs.")
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__init__.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import functional
2
+ from .modules import * # noqa: F403
3
+ from .modules import MaxPool2d
4
+
5
+ __all__ = [
6
+ 'BatchNorm2d',
7
+ 'BatchNorm3d',
8
+ 'Conv1d',
9
+ 'Conv2d',
10
+ 'Conv3d',
11
+ 'ConvTranspose1d',
12
+ 'ConvTranspose2d',
13
+ 'ConvTranspose3d',
14
+ 'DeQuantize',
15
+ 'ELU',
16
+ 'Embedding',
17
+ 'EmbeddingBag',
18
+ 'GroupNorm',
19
+ 'Hardswish',
20
+ 'InstanceNorm1d',
21
+ 'InstanceNorm2d',
22
+ 'InstanceNorm3d',
23
+ 'LayerNorm',
24
+ 'LeakyReLU',
25
+ 'Linear',
26
+ 'LSTM',
27
+ 'MultiheadAttention',
28
+ 'Quantize',
29
+ 'ReLU6',
30
+ 'Sigmoid',
31
+ 'Softmax',
32
+ 'Dropout',
33
+ 'PReLU',
34
+ # Wrapper modules
35
+ 'FloatFunctional',
36
+ 'FXFloatFunctional',
37
+ 'QFunctional',
38
+ ]
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (262 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from .linear import Linear
3
+ from .rnn import LSTM, GRU, LSTMCell, RNNCell, GRUCell
4
+ from .conv import Conv1d, Conv2d, Conv3d, ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
5
+
6
+ __all__ = [
7
+ 'Linear',
8
+ 'LSTM',
9
+ 'GRU',
10
+ 'LSTMCell',
11
+ 'RNNCell',
12
+ 'GRUCell',
13
+ 'Conv1d',
14
+ 'Conv2d',
15
+ 'Conv3d',
16
+ 'ConvTranspose1d',
17
+ 'ConvTranspose2d',
18
+ 'ConvTranspose3d',
19
+ ]
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc ADDED
Binary file (8.19 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/conv.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ r"""Dynamically quantized convolution modules."""
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+ from torch import Tensor
8
+ from torch._ops import ops
9
+ from torch.nn.common_types import _size_1_t
10
+ from torch.nn.modules.utils import _single, _pair, _triple
11
+ from torch.ao.nn.quantized.modules.conv import _reverse_repeat_padding
12
+ import torch.ao.nn.quantized as nnq
13
+ import warnings
14
+
15
+ __all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d']
16
+
17
+
18
+ class Conv1d(nnq.Conv1d):
19
+ r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
20
+
21
+ For details on input arguments, parameters, and implementation see
22
+ :class:`~torch.nn.Conv1d` and :class:`~torch.ao.nn.quantized.dynamic.Conv1d` and
23
+
24
+ Attributes:
25
+ weight (Tensor): packed tensor derived from the learnable weight
26
+ parameter.
27
+ scale (Tensor): scalar for the output scale
28
+ zero_point (Tensor): scalar for the output zero point
29
+
30
+ See :class:`~torch.nn.Conv1d` for other attributes.
31
+
32
+ Examples::
33
+
34
+ >>> # xdoctest: +SKIP
35
+ >>> m = nn.quantized.dynamic.Conv1d(16, 33, 3, stride=2)
36
+ >>> input = torch.randn(20, 16, 100)
37
+ >>> output = m(input)
38
+
39
+ """
40
+
41
+ _FLOAT_MODULE = nn.Conv1d
42
+ _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment]
43
+ _NNI_CONV_RELU_MODULE = None # type: ignore[assignment]
44
+
45
+ def __init__(self,
46
+ in_channels: int,
47
+ out_channels: int,
48
+ kernel_size: _size_1_t,
49
+ stride: _size_1_t = 1,
50
+ padding: _size_1_t = 0,
51
+ dilation: _size_1_t = 1,
52
+ groups: int = 1,
53
+ bias: bool = True,
54
+ padding_mode: str = 'zeros',
55
+ device=None,
56
+ dtype=None,
57
+ reduce_range=True):
58
+ warnings.warn(
59
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
60
+ self._get_name()
61
+ )
62
+ )
63
+ factory_kwargs = {'device': device, 'dtype': dtype}
64
+ kernel_size = _single(kernel_size)
65
+ stride = _single(stride)
66
+ padding = padding if isinstance(padding, str) else _single(padding)
67
+ dilation = _single(dilation)
68
+
69
+ super().__init__(
70
+ in_channels, out_channels, kernel_size, stride, padding, dilation,
71
+ groups, bias, padding_mode, **factory_kwargs)
72
+
73
+ def _get_name(self):
74
+ return 'DynamicQuantizedConv1d'
75
+
76
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
77
+ # Temporarily using len(shape) instead of ndim due to JIT issue
78
+ # https://github.com/pytorch/pytorch/issues/23890
79
+ if len(input.shape) != 3:
80
+ raise ValueError("Input shape must be `(N, C, L)`!")
81
+ if self.padding_mode != 'zeros':
82
+ # Padding in Conv1d is stored as (p, p), need to get (p,)
83
+ _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding[:1])
84
+ input = F.pad(input, _reversed_padding_repeated_twice,
85
+ mode=self.padding_mode)
86
+ return ops.quantized.conv1d_dynamic(input, self._packed_params, reduce_range)
87
+
88
+
89
+ class Conv2d(nnq.Conv2d):
90
+ r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
91
+
92
+ For details on input arguments, parameters, and implementation see
93
+ :class:`~torch.nn.Conv2d` and :class:`~torch.ao.nn.quantized.dynamic.Conv2d` and
94
+
95
+ Attributes:
96
+ weight (Tensor): packed tensor derived from the learnable weight
97
+ parameter.
98
+ scale (Tensor): scalar for the output scale
99
+ zero_point (Tensor): scalar for the output zero point
100
+
101
+ See :class:`~torch.nn.Conv2d` for other attributes.
102
+
103
+ Examples::
104
+
105
+ >>> # xdoctest: +SKIP
106
+ >>> # With square kernels and equal stride
107
+ >>> m = nn.quantized.dynamic.Conv2d(16, 33, 3, stride=2)
108
+ >>> # non-square kernels and unequal stride and with padding
109
+ >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
110
+ >>> # non-square kernels and unequal stride and with padding and dilation
111
+ >>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
112
+ >>> input = torch.randn(20, 16, 50, 100)
113
+ >>> output = m(input)
114
+
115
+ """
116
+ _FLOAT_MODULE = nn.Conv2d
117
+ _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment]
118
+ _NNI_CONV_RELU_MODULE = None # type: ignore[assignment]
119
+
120
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
121
+ padding=0, dilation=1, groups=1, bias=True,
122
+ padding_mode='zeros', device=None, dtype=None):
123
+ warnings.warn(
124
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
125
+ self._get_name()
126
+ )
127
+ )
128
+ factory_kwargs = {'device': device, 'dtype': dtype}
129
+ kernel_size = _pair(kernel_size)
130
+ stride = _pair(stride)
131
+ padding = _pair(padding)
132
+ dilation = _pair(dilation)
133
+
134
+ super().__init__(
135
+ in_channels, out_channels, kernel_size, stride, padding, dilation,
136
+ groups, bias, padding_mode, **factory_kwargs)
137
+
138
+ def _get_name(self):
139
+ return 'DynamicQuantizedConv2d'
140
+
141
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
142
+ # Temporarily using len(shape) instead of ndim due to JIT issue
143
+ # https://github.com/pytorch/pytorch/issues/23890
144
+ if len(input.shape) != 4:
145
+ raise ValueError("Input shape must be `(N, C, H, W)`!")
146
+ if self.padding_mode != 'zeros':
147
+ _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
148
+ input = F.pad(input, _reversed_padding_repeated_twice,
149
+ mode=self.padding_mode)
150
+ return ops.quantized.conv2d_dynamic(
151
+ input, self._packed_params, reduce_range)
152
+
153
+
154
+ class Conv3d(nnq.Conv3d):
155
+ r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
156
+
157
+ For details on input arguments, parameters, and implementation see
158
+ :class:`~torch.nn.Conv3d` and :class:`~torch.ao.nn.quantized.dynamic.Conv3d` and
159
+
160
+ Attributes:
161
+ weight (Tensor): packed tensor derived from the learnable weight
162
+ parameter.
163
+ scale (Tensor): scalar for the output scale
164
+ zero_point (Tensor): scalar for the output zero point
165
+
166
+ See :class:`~torch.nn.Conv3d` for other attributes.
167
+
168
+ Examples::
169
+
170
+ >>> # xdoctest: +SKIP
171
+ >>> # With square kernels and equal stride
172
+ >>> m = nn.quantized.dynamic.Conv3d(16, 33, 3, stride=2)
173
+ >>> # non-square kernels and unequal stride and with padding
174
+ >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
175
+ >>> # non-square kernels and unequal stride and with padding and dilation
176
+ >>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2), dilation=(1, 2, 2))
177
+ >>> input = torch.randn(20, 16, 56, 56, 56)
178
+ >>> output = m(input)
179
+
180
+ """
181
+ _FLOAT_MODULE = nn.Conv3d
182
+ _NNIQAT_CONV_BN_MODULE = None # type: ignore[assignment]
183
+ _NNI_CONV_RELU_MODULE = None # type: ignore[assignment]
184
+
185
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
186
+ padding=0, dilation=1, groups=1, bias=True,
187
+ padding_mode='zeros', device=None, dtype=None):
188
+ warnings.warn(
189
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
190
+ self._get_name()
191
+ )
192
+ )
193
+ assert padding_mode != 'reflect', "Conv3d does not support reflection padding"
194
+ factory_kwargs = {'device': device, 'dtype': dtype}
195
+ kernel_size = _triple(kernel_size)
196
+ stride = _triple(stride)
197
+ padding = _triple(padding)
198
+ dilation = _triple(dilation)
199
+ super()._init(
200
+ in_channels, out_channels, kernel_size, stride, padding, dilation,
201
+ False, _triple(0), groups, bias, padding_mode, **factory_kwargs)
202
+
203
+ def _get_name(self):
204
+ return 'DynamicQuantizedConv3d'
205
+
206
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
207
+ # Temporarily using len(shape) instead of ndim due to JIT issue
208
+ # https://github.com/pytorch/pytorch/issues/23890
209
+ if len(input.shape) != 5:
210
+ raise ValueError("Input shape must be `(N, C, D, H, W)`!")
211
+ if self.padding_mode != 'zeros':
212
+ _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
213
+ input = F.pad(input, _reversed_padding_repeated_twice,
214
+ mode=self.padding_mode)
215
+ return ops.quantized.conv3d_dynamic(
216
+ input, self._packed_params, reduce_range)
217
+
218
+
219
+ class ConvTranspose1d(nnq.ConvTranspose1d):
220
+ r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
221
+
222
+ For details on input arguments, parameters, and implementation see
223
+ :class:`~torch.nn.ConvTranspose1d`.
224
+
225
+ For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv1d`
226
+
227
+ Attributes:
228
+ weight (Tensor): packed tensor derived from the learnable weight
229
+ parameter.
230
+ scale (Tensor): scalar for the output scale
231
+ zero_point (Tensor): scalar for the output zero point
232
+ See :class:`~torch.nn.ConvTranspose1d` for other attributes.
233
+
234
+ Examples::
235
+
236
+ >>> # xdoctest: +SKIP
237
+ >>> # With square kernels and equal stride
238
+ >>> m = nndq.ConvTranspose1d(16, 33, 3, stride=2)
239
+ >>> # non-square kernels and unequal stride and with padding
240
+ >>> m = nndq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
241
+ >>> output = m(input)
242
+ >>> # exact output size can be also specified as an argument
243
+ >>> downsample = nndq.Conv1d(16, 16, 3, stride=2, padding=1)
244
+ >>> upsample = nndq.ConvTranspose1d(16, 16, 3, stride=2, padding=1)
245
+ >>> h = downsample(input)
246
+ >>> h.size()
247
+ torch.Size([1, 16, 6])
248
+ >>> output = upsample(h, output_size=input.size())
249
+ >>> output.size()
250
+ torch.Size([1, 16, 12])
251
+ """
252
+
253
+ _FLOAT_MODULE = nn.ConvTranspose1d
254
+
255
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
256
+ padding=0, output_padding=0, groups=1, bias=True,
257
+ dilation=1, padding_mode='zeros', device=None, dtype=None):
258
+ warnings.warn(
259
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
260
+ self._get_name()
261
+ )
262
+ )
263
+ factory_kwargs = {'device': device, 'dtype': dtype}
264
+ super().__init__(
265
+ in_channels, out_channels, kernel_size, stride, padding, output_padding,
266
+ groups, bias, dilation, padding_mode, **factory_kwargs)
267
+
268
+ def _get_name(self):
269
+ return 'DynamicQuantizedConvTranspose1d'
270
+
271
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
272
+ # Temporarily using len(shape) instead of ndim due to JIT issue
273
+ # https://github.com/pytorch/pytorch/issues/23890
274
+ if len(input.shape) != 3:
275
+ raise ValueError("Input shape must be `(N, C, L)`!")
276
+ return torch.ops.quantized.conv_transpose1d_dynamic(
277
+ input, self._packed_params, reduce_range)
278
+
279
+
280
+ class ConvTranspose2d(nnq.ConvTranspose2d):
281
+ r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
282
+
283
+ For details on input arguments, parameters, and implementation see
284
+ :class:`~torch.nn.ConvTranspose2d`.
285
+
286
+ For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv2d`
287
+
288
+ Attributes:
289
+ weight (Tensor): packed tensor derived from the learnable weight
290
+ parameter.
291
+ scale (Tensor): scalar for the output scale
292
+ zero_point (Tensor): scalar for the output zero point
293
+ See :class:`~torch.nn.ConvTranspose2d` for other attributes.
294
+
295
+ Examples::
296
+
297
+ >>> # xdoctest: +SKIP
298
+ >>> # With square kernels and equal stride
299
+ >>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2)
300
+ >>> # non-square kernels and unequal stride and with padding
301
+ >>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
302
+ >>> output = m(input)
303
+ >>> # exact output size can be also specified as an argument
304
+ >>> downsample = nnq.Conv2d(16, 16, 3, stride=2, padding=1)
305
+ >>> upsample = nnq.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
306
+ >>> h = downsample(input)
307
+ >>> h.size()
308
+ torch.Size([1, 16, 6, 6])
309
+ >>> output = upsample(h, output_size=input.size())
310
+ >>> output.size()
311
+ torch.Size([1, 16, 12, 12])
312
+ """
313
+
314
+ _FLOAT_MODULE = nn.ConvTranspose2d
315
+
316
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
317
+ padding=0, output_padding=0, groups=1, bias=True,
318
+ dilation=1, padding_mode='zeros', device=None, dtype=None):
319
+ warnings.warn(
320
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
321
+ self._get_name()
322
+ )
323
+ )
324
+ factory_kwargs = {'device': device, 'dtype': dtype}
325
+ super().__init__(
326
+ in_channels, out_channels, kernel_size, stride, padding, output_padding,
327
+ groups, bias, dilation, padding_mode, **factory_kwargs)
328
+
329
+ def _get_name(self):
330
+ return 'DynamicQuantizedConvTranspose2d'
331
+
332
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
333
+ # Temporarily using len(shape) instead of ndim due to JIT issue
334
+ # https://github.com/pytorch/pytorch/issues/23890
335
+ if len(input.shape) != 4:
336
+ raise ValueError("Input shape must be `(N, C, H, W)`!")
337
+ return ops.quantized.conv_transpose2d_dynamic(
338
+ input, self._packed_params, reduce_range)
339
+
340
+
341
+ class ConvTranspose3d(nnq.ConvTranspose3d):
342
+ r"""A dynamically quantized transposed convolution module with floating point tensors as inputs and outputs.
343
+
344
+ For details on input arguments, parameters, and implementation see
345
+ :class:`~torch.nn.ConvTranspose3d`.
346
+
347
+ For special notes, please, see :class:`~torch.ao.nn.quantized.dynamic.Conv3d`
348
+
349
+ Attributes:
350
+ weight (Tensor): packed tensor derived from the learnable weight
351
+ parameter.
352
+ scale (Tensor): scalar for the output scale
353
+ zero_point (Tensor): scalar for the output zero point
354
+ See :class:`~torch.nn.ConvTranspose3d` for other attributes.
355
+
356
+ Examples::
357
+
358
+ >>> # xdoctest: +SKIP
359
+ >>> # With cubic kernels and equal stride
360
+ >>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2)
361
+ >>> # non-cubic kernels and unequal stride and with padding
362
+ >>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2))
363
+ >>> output = m(input)
364
+ >>> # exact output size can be also specified as an argument
365
+ >>> downsample = nnq.Conv3d(16, 16, 3, stride=2, padding=1)
366
+ >>> upsample = nnq.ConvTranspose3d(16, 16, 3, stride=2, padding=1)
367
+ >>> h = downsample(input)
368
+ >>> h.size()
369
+ torch.Size([1, 16, 6, 6, 6])
370
+ >>> output = upsample(h, output_size=input.size())
371
+ >>> output.size()
372
+ torch.Size([1, 16, 12, 12, 12])
373
+ """
374
+
375
+ _FLOAT_MODULE = nn.ConvTranspose3d
376
+
377
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
378
+ padding=0, output_padding=0, groups=1, bias=True,
379
+ dilation=1, padding_mode='zeros', device=None, dtype=None):
380
+ warnings.warn(
381
+ "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
382
+ self._get_name()
383
+ )
384
+ )
385
+ factory_kwargs = {'device': device, 'dtype': dtype}
386
+ super().__init__(
387
+ in_channels, out_channels, kernel_size, stride, padding, output_padding,
388
+ groups, bias, dilation, padding_mode, **factory_kwargs)
389
+
390
+ def _get_name(self):
391
+ return 'DynamicQuantizedConvTranspose3d'
392
+
393
+ def forward(self, input: Tensor, reduce_range: bool = True) -> Tensor:
394
+ # Temporarily using len(shape) instead of ndim due to JIT issue
395
+ # https://github.com/pytorch/pytorch/issues/23890
396
+ if len(input.shape) != 5:
397
+ raise ValueError("Input shape must be `(N, C, T, H, W)`!")
398
+ return ops.quantized.conv_transpose3d_dynamic(
399
+ input, self._packed_params, reduce_range)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.ao.nn.quantized as nnq
3
+ from torch.ao.nn.quantized.modules.utils import _quantize_weight
4
+ import torch.ao.nn.intrinsic as nni
5
+
6
+ __all__ = [
7
+ "Linear",
8
+ ]
9
+
10
+
11
+ class Linear(nnq.Linear):
12
+ r"""
13
+ A dynamic quantized linear module with floating point tensor as inputs and outputs.
14
+ We adopt the same interface as `torch.nn.Linear`, please see
15
+ https://pytorch.org/docs/stable/nn.html#torch.nn.Linear for documentation.
16
+
17
+ Similar to :class:`torch.nn.Linear`, attributes will be randomly
18
+ initialized at module creation time and will be overwritten later
19
+
20
+ Attributes:
21
+ weight (Tensor): the non-learnable quantized weights of the module which are of
22
+ shape :math:`(\text{out\_features}, \text{in\_features})`.
23
+ bias (Tensor): the non-learnable floating point bias of the module of shape
24
+ :math:`(\text{out\_features})`. If :attr:`bias` is ``True``,
25
+ the values are initialized to zero.
26
+
27
+ Examples::
28
+
29
+ >>> # xdoctest: +SKIP
30
+ >>> m = nn.quantized.dynamic.Linear(20, 30)
31
+ >>> input = torch.randn(128, 20)
32
+ >>> output = m(input)
33
+ >>> print(output.size())
34
+ torch.Size([128, 30])
35
+ """
36
+ # version used in this class is different from the parent class nnq.Linear
37
+ _version = 4
38
+
39
+ def __init__(self, in_features, out_features, bias_=True, dtype=torch.qint8):
40
+ super().__init__(in_features, out_features, bias_, dtype=dtype)
41
+ # We don't muck around with buffers or attributes or anything here
42
+ # to keep the module simple. *everything* is simply a Python attribute.
43
+ # Serialization logic is explicitly handled in the below serialization and
44
+ # deserialization modules
45
+ self.version = 4
46
+
47
+ def forward(self, x):
48
+ # Note that we can handle self.bias == None case.
49
+ if self._packed_params.dtype == torch.qint8:
50
+ if self.version is None or self.version < 4:
51
+ Y = torch.ops.quantized.linear_dynamic(
52
+ x, self._packed_params._packed_params)
53
+ else:
54
+ Y = torch.ops.quantized.linear_dynamic(
55
+ x, self._packed_params._packed_params, reduce_range=True)
56
+ elif self._packed_params.dtype == torch.float16:
57
+ Y = torch.ops.quantized.linear_dynamic_fp16(
58
+ x, self._packed_params._packed_params)
59
+ else:
60
+ raise RuntimeError('Unsupported dtype on dynamic quantized linear!')
61
+ return Y.to(x.dtype)
62
+
63
+ def _get_name(self):
64
+ return 'DynamicQuantizedLinear'
65
+
66
+ def extra_repr(self):
67
+ extra_repr_str = 'in_features={}, out_features={}, dtype={}'.format(
68
+ self.in_features, self.out_features, self._packed_params.dtype
69
+ )
70
+ if self._packed_params.dtype == torch.qint8:
71
+ extra_repr_str += f', qscheme={self.weight().qscheme()}'
72
+ return extra_repr_str
73
+
74
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
75
+ missing_keys, unexpected_keys, error_msgs):
76
+ version = local_metadata.get('version', None)
77
+ self.version = version
78
+ super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
79
+ missing_keys, unexpected_keys, error_msgs)
80
+
81
+ @classmethod
82
+ def from_float(cls, mod):
83
+ r"""Create a dynamic quantized module from a float module or qparams_dict
84
+
85
+ Args:
86
+ mod (Module): a float module, either produced by torch.ao.quantization
87
+ utilities or provided by the user
88
+ """
89
+ float_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear,
90
+ torch.ao.nn.intrinsic.modules.fused.LinearReLU, torch.ao.nn.qat.dynamic.Linear]
91
+
92
+ assert type(mod) in float_modules, \
93
+ 'nn.quantized.dynamic.Linear.from_float only works for one of' + \
94
+ str([float_mod.__name__ for float_mod in float_modules])
95
+ assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
96
+ if type(mod) == nni.LinearReLU:
97
+ mod = mod[0]
98
+ if mod.qconfig is not None and mod.qconfig.weight is not None:
99
+ weight_observer = mod.qconfig.weight()
100
+ else:
101
+ # We have the circular import issues if we import the qconfig in the beginning of this file:
102
+ # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
103
+ # import until we need it.
104
+ from torch.ao.quantization.qconfig import default_dynamic_qconfig
105
+ weight_observer = default_dynamic_qconfig.weight()
106
+ dtype = weight_observer.dtype
107
+ assert dtype in [torch.qint8, torch.float16], "The only supported dtypes for " \
108
+ f"dynamic quantized linear are qint8 and float16 got: {dtype}"
109
+ weight_observer(mod.weight)
110
+ if dtype == torch.qint8:
111
+ qweight = _quantize_weight(mod.weight.float(), weight_observer)
112
+ elif dtype == torch.float16:
113
+ qweight = mod.weight.float()
114
+ else:
115
+ raise RuntimeError('Unsupported dtype specified for dynamic quantized Linear!')
116
+ qlinear = cls(mod.in_features, mod.out_features, dtype=dtype)
117
+ qlinear.set_weight_bias(qweight, mod.bias)
118
+ return qlinear
119
+
120
+ @classmethod
121
+ def from_reference(cls, ref_qlinear):
122
+ """ Create a (fbgemm/qnnpack) dynamic quantized module from a reference quantized
123
+ module
124
+ Args:
125
+ ref_qlinear (Module): a reference quantized module, either produced by
126
+ torch.ao.quantization functions or provided by the user
127
+ """
128
+ qlinear = cls(ref_qlinear.in_features, ref_qlinear.out_features, dtype=ref_qlinear.weight_dtype)
129
+ qweight = ref_qlinear.get_quantized_weight()
130
+ bias = ref_qlinear.bias
131
+ qlinear.set_weight_bias(qweight, bias)
132
+ return qlinear
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/rnn.py ADDED
@@ -0,0 +1,1096 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numbers
2
+ import warnings
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch import Tensor # noqa: F401
7
+ from torch._jit_internal import Tuple, Optional, List, Union, Dict # noqa: F401
8
+ from torch.nn.utils.rnn import PackedSequence
9
+ from torch.ao.nn.quantized.modules.utils import _quantize_weight
10
+
11
+ __all__ = ['pack_weight_bias', 'PackedParameter', 'RNNBase', 'LSTM', 'GRU', 'RNNCellBase', 'RNNCell', 'LSTMCell',
12
+ 'GRUCell', "apply_permutation"]
13
+
14
+
15
+ def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
16
+ return tensor.index_select(dim, permutation)
17
+
18
+
19
+ def apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
20
+ warnings.warn("apply_permutation is deprecated, please use tensor.index_select(dim, permutation) instead")
21
+ return _apply_permutation(tensor, permutation, dim)
22
+
23
+
24
+ def pack_weight_bias(qweight, bias, dtype):
25
+
26
+ if dtype == torch.qint8:
27
+ # for each layer, for each direction we need to quantize and pack
28
+ # weights and pack parameters in this order:
29
+ #
30
+ # w_ih, w_hh
31
+ packed_weight = \
32
+ torch.ops.quantized.linear_prepack(qweight, bias)
33
+
34
+ return packed_weight
35
+ else:
36
+ # for each layer, for each direction we need to quantize and pack
37
+ # weights and pack parameters in this order:
38
+ #
39
+ # packed_ih, packed_hh, b_ih, b_hh
40
+ packed_weight = torch.ops.quantized.linear_prepack_fp16(
41
+ qweight, bias)
42
+
43
+ return packed_weight
44
+
45
+
46
+ class PackedParameter(torch.nn.Module):
47
+ def __init__(self, param):
48
+ super().__init__()
49
+ self.param = param
50
+
51
+ def _save_to_state_dict(self, destination, prefix, keep_vars):
52
+ super()._save_to_state_dict(destination, prefix, keep_vars)
53
+ destination[prefix + 'param'] = self.param
54
+
55
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
56
+ missing_keys, unexpected_keys, error_msgs):
57
+ self.param = state_dict[prefix + 'param']
58
+ super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
59
+ missing_keys, unexpected_keys, error_msgs)
60
+
61
+
62
+ class RNNBase(torch.nn.Module):
63
+
64
+ _FLOAT_MODULE = nn.RNNBase
65
+
66
+ _version = 2
67
+
68
+ def __init__(self, mode, input_size, hidden_size,
69
+ num_layers=1, bias=True, batch_first=False,
70
+ dropout=0., bidirectional=False, dtype=torch.qint8):
71
+ super().__init__()
72
+
73
+ self.mode = mode
74
+ self.input_size = input_size
75
+ self.hidden_size = hidden_size
76
+ self.num_layers = num_layers
77
+ self.bias = bias
78
+ self.batch_first = batch_first
79
+ self.dropout = float(dropout)
80
+ self.bidirectional = bidirectional
81
+ self.dtype = dtype
82
+ self.version = 2
83
+ self.training = False
84
+ num_directions = 2 if bidirectional else 1
85
+
86
+ # "type: ignore" is required since ints and Numbers are not fully comparable
87
+ # https://github.com/python/mypy/issues/8566
88
+ if not isinstance(dropout, numbers.Number) \
89
+ or not 0 <= dropout <= 1 or isinstance(dropout, bool): # type: ignore[operator]
90
+ raise ValueError("dropout should be a number in range [0, 1] "
91
+ "representing the probability of an element being "
92
+ "zeroed")
93
+ if dropout > 0 and num_layers == 1: # type: ignore[operator]
94
+ warnings.warn("dropout option adds dropout after all but last "
95
+ "recurrent layer, so non-zero dropout expects "
96
+ f"num_layers greater than 1, but got dropout={dropout} and "
97
+ f"num_layers={num_layers}")
98
+
99
+ if mode == 'LSTM':
100
+ gate_size = 4 * hidden_size
101
+ elif mode == 'GRU':
102
+ gate_size = 3 * hidden_size
103
+ else:
104
+ raise ValueError("Unrecognized RNN mode: " + mode)
105
+
106
+ _all_weight_values = []
107
+ for layer in range(num_layers):
108
+ for direction in range(num_directions):
109
+ layer_input_size = input_size if layer == 0 else hidden_size * num_directions
110
+
111
+ w_ih = torch.randn(gate_size, layer_input_size).to(torch.float)
112
+ w_hh = torch.randn(gate_size, hidden_size).to(torch.float)
113
+ b_ih = torch.randn(gate_size).to(torch.float)
114
+ b_hh = torch.randn(gate_size).to(torch.float)
115
+ if dtype == torch.qint8:
116
+ w_ih = torch.quantize_per_tensor(w_ih, scale=0.1, zero_point=0, dtype=torch.qint8)
117
+ w_hh = torch.quantize_per_tensor(w_hh, scale=0.1, zero_point=0, dtype=torch.qint8)
118
+ packed_ih = \
119
+ torch.ops.quantized.linear_prepack(w_ih, b_ih)
120
+ packed_hh = \
121
+ torch.ops.quantized.linear_prepack(w_hh, b_hh)
122
+ if self.version is None or self.version < 2:
123
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
124
+ packed_ih, packed_hh, b_ih, b_hh)
125
+ else:
126
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
127
+ packed_ih, packed_hh, b_ih, b_hh, True)
128
+ else:
129
+ packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih)
130
+ packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh)
131
+ cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
132
+ packed_ih, packed_hh)
133
+
134
+ _all_weight_values.append(PackedParameter(cell_params))
135
+ self._all_weight_values = torch.nn.ModuleList(_all_weight_values)
136
+
137
+ def _get_name(self):
138
+ return 'DynamicQuantizedRNN'
139
+
140
+ def extra_repr(self):
141
+ s = '{input_size}, {hidden_size}'
142
+ if self.num_layers != 1:
143
+ s += ', num_layers={num_layers}'
144
+ if self.bias is not True:
145
+ s += ', bias={bias}'
146
+ if self.batch_first is not False:
147
+ s += ', batch_first={batch_first}'
148
+ if self.dropout != 0:
149
+ s += ', dropout={dropout}'
150
+ if self.bidirectional is not False:
151
+ s += ', bidirectional={bidirectional}'
152
+ return s.format(**self.__dict__)
153
+
154
+ def __repr__(self):
155
+ # We don't want to show `ModuleList` children, hence custom
156
+ # `__repr__`. This is the same as nn.Module.__repr__, except the check
157
+ # for the `PackedParameter` and `nn.ModuleList`.
158
+ # You should still override `extra_repr` to add more info.
159
+ extra_lines = []
160
+ extra_repr = self.extra_repr()
161
+ # empty string will be split into list ['']
162
+ if extra_repr:
163
+ extra_lines = extra_repr.split('\n')
164
+ child_lines = []
165
+ for key, module in self._modules.items():
166
+ if isinstance(module, (PackedParameter, nn.ModuleList)):
167
+ continue
168
+ mod_str = repr(module)
169
+ mod_str = nn.modules.module._addindent(mod_str, 2)
170
+ child_lines.append('(' + key + '): ' + mod_str)
171
+ lines = extra_lines + child_lines
172
+
173
+ main_str = self._get_name() + '('
174
+ if lines:
175
+ # simple one-liner info, which most builtin Modules will use
176
+ if len(extra_lines) == 1 and not child_lines:
177
+ main_str += extra_lines[0]
178
+ else:
179
+ main_str += '\n ' + '\n '.join(lines) + '\n'
180
+
181
+ main_str += ')'
182
+ return main_str
183
+
184
+ def check_input(self, input: Tensor, batch_sizes: Optional[Tensor]) -> None:
185
+ expected_input_dim = 2 if batch_sizes is not None else 3
186
+ if input.dim() != expected_input_dim:
187
+ raise RuntimeError(
188
+ f'input must have {expected_input_dim} dimensions, got {input.dim()}')
189
+ if self.input_size != input.size(-1):
190
+ raise RuntimeError(
191
+ f'input.size(-1) must be equal to input_size. Expected {self.input_size}, got {input.size(-1)}')
192
+
193
+ def get_expected_hidden_size(self, input: Tensor, batch_sizes: Optional[Tensor]) -> Tuple[int, int, int]:
194
+ if batch_sizes is not None:
195
+ mini_batch = int(batch_sizes[0])
196
+ else:
197
+ mini_batch = input.size(0) if self.batch_first else input.size(1)
198
+ num_directions = 2 if self.bidirectional else 1
199
+ expected_hidden_size = (self.num_layers * num_directions,
200
+ mini_batch, self.hidden_size)
201
+ return expected_hidden_size
202
+
203
+ def check_hidden_size(
204
+ self, hx: Tensor, expected_hidden_size: Tuple[int, int, int],
205
+ msg: str = 'Expected hidden size {}, got {}'
206
+ ) -> None:
207
+ if hx.size() != expected_hidden_size:
208
+ raise RuntimeError(msg.format(
209
+ expected_hidden_size, list(hx.size())))
210
+
211
+ def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None:
212
+ self.check_input(input, batch_sizes)
213
+ expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
214
+ self.check_hidden_size(hidden, expected_hidden_size,
215
+ msg='Expected hidden size {}, got {}')
216
+
217
+ def permute_hidden(self, hx: Tensor, permutation: Optional[Tensor]) -> Tensor:
218
+ if permutation is None:
219
+ return hx
220
+ return _apply_permutation(hx, permutation)
221
+
222
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
223
+ missing_keys, unexpected_keys, error_msgs):
224
+ version = local_metadata.get('version', None)
225
+ self.version = version
226
+ super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
227
+ missing_keys, unexpected_keys, error_msgs)
228
+
229
+ def set_weight_bias(self, weight_bias_dict):
230
+
231
+ def weight_bias_name(ihhh, layer, suffix):
232
+ weight_name = f"weight_{ihhh}_l{layer}{suffix}"
233
+ bias_name = f"bias_{ihhh}_l{layer}{suffix}"
234
+ return weight_name, bias_name
235
+
236
+ num_directions = 2 if self.bidirectional else 1
237
+ # TODO: dedup with __init__ of RNNBase
238
+ _all_weight_values = []
239
+ for layer in range(self.num_layers):
240
+ for direction in range(num_directions):
241
+ suffix = "_reverse" if direction == 1 else ""
242
+ w_ih_name, b_ih_name = weight_bias_name("ih", layer, suffix)
243
+ w_hh_name, b_hh_name = weight_bias_name("hh", layer, suffix)
244
+ w_ih = weight_bias_dict[w_ih_name]
245
+ b_ih = weight_bias_dict[b_ih_name]
246
+ w_hh = weight_bias_dict[w_hh_name]
247
+ b_hh = weight_bias_dict[b_hh_name]
248
+ if w_ih.dtype == torch.qint8:
249
+ packed_ih = torch.ops.quantized.linear_prepack(w_ih, b_ih)
250
+ packed_hh = torch.ops.quantized.linear_prepack(w_hh, b_hh)
251
+ if self.version is None or self.version < 2:
252
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
253
+ packed_ih, packed_hh, b_ih, b_hh)
254
+ else:
255
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
256
+ packed_ih, packed_hh, b_ih, b_hh, True)
257
+ else:
258
+ packed_ih = torch.ops.quantized.linear_prepack_fp16(w_ih, b_ih)
259
+ packed_hh = torch.ops.quantized.linear_prepack_fp16(w_hh, b_hh)
260
+ cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
261
+ packed_ih, packed_hh)
262
+
263
+ _all_weight_values.append(PackedParameter(cell_params))
264
+ self._all_weight_values = torch.nn.ModuleList(_all_weight_values)
265
+
266
+ @classmethod
267
+ def from_float(cls, mod):
268
+ assert type(mod) in {torch.nn.LSTM,
269
+ torch.nn.GRU}, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM and nn.GRU'
270
+ assert hasattr(
271
+ mod,
272
+ 'qconfig'
273
+ ), 'Input float module must have qconfig defined'
274
+
275
+ if mod.qconfig is not None and mod.qconfig.weight is not None:
276
+ weight_observer_method = mod.qconfig.weight
277
+ else:
278
+ # We have the circular import issues if we import the qconfig in the beginning of this file:
279
+ # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
280
+ # import until we need it.
281
+ from torch.ao.quantization.qconfig import default_dynamic_qconfig
282
+ weight_observer_method = default_dynamic_qconfig.weight
283
+
284
+ dtype = weight_observer_method().dtype
285
+ supported_scalar_types = [torch.qint8, torch.float16]
286
+ if dtype not in supported_scalar_types:
287
+ raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}')
288
+ # RNNBase can be either LSTM or GRU
289
+ qRNNBase: Union[LSTM, GRU]
290
+ if mod.mode == 'LSTM':
291
+ qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers,
292
+ mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype)
293
+ elif mod.mode == 'GRU':
294
+ qRNNBase = GRU(mod.input_size, mod.hidden_size, mod.num_layers,
295
+ mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype)
296
+ else:
297
+ raise NotImplementedError('Only LSTM/GRU is supported for QuantizedRNN for now')
298
+
299
+ num_directions = 2 if mod.bidirectional else 1
300
+
301
+ assert mod.bias
302
+
303
+ _all_weight_values = []
304
+ for layer in range(qRNNBase.num_layers):
305
+ for direction in range(num_directions):
306
+ suffix = '_reverse' if direction == 1 else ''
307
+
308
+ def retrieve_weight_bias(ihhh):
309
+ weight_name = f'weight_{ihhh}_l{layer}{suffix}'
310
+ bias_name = f'bias_{ihhh}_l{layer}{suffix}'
311
+ weight = getattr(mod, weight_name)
312
+ bias = getattr(mod, bias_name)
313
+ return weight, bias
314
+
315
+ weight_ih, bias_ih = retrieve_weight_bias('ih')
316
+ weight_hh, bias_hh = retrieve_weight_bias('hh')
317
+
318
+ if dtype == torch.qint8:
319
+ def quantize_and_pack(w, b):
320
+ weight_observer = weight_observer_method()
321
+ weight_observer(w)
322
+ qweight = _quantize_weight(w.float(), weight_observer)
323
+ packed_weight = \
324
+ torch.ops.quantized.linear_prepack(qweight, b)
325
+ return packed_weight
326
+ packed_ih = quantize_and_pack(weight_ih, bias_ih)
327
+ packed_hh = quantize_and_pack(weight_hh, bias_hh)
328
+ if qRNNBase.version is None or qRNNBase.version < 2:
329
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
330
+ packed_ih, packed_hh, bias_ih, bias_hh)
331
+ else:
332
+ cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
333
+ packed_ih, packed_hh, bias_ih, bias_hh, True)
334
+
335
+ elif dtype == torch.float16:
336
+ packed_ih = torch.ops.quantized.linear_prepack_fp16(
337
+ weight_ih.float(), bias_ih)
338
+ packed_hh = torch.ops.quantized.linear_prepack_fp16(
339
+ weight_hh.float(), bias_hh)
340
+
341
+ cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
342
+ packed_ih, packed_hh)
343
+ else:
344
+ raise RuntimeError('Unsupported dtype specified for dynamic quantized LSTM!')
345
+
346
+ _all_weight_values.append(PackedParameter(cell_params))
347
+ qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values)
348
+
349
+ return qRNNBase
350
+
351
+ def _weight_bias(self):
352
+ # Returns a dict of weights and biases
353
+ weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}}
354
+ count = 0
355
+ num_directions = 2 if self.bidirectional else 1
356
+ for layer in range(self.num_layers):
357
+ for direction in range(num_directions):
358
+ suffix = '_reverse' if direction == 1 else ''
359
+ key_name1 = f'weight_ih_l{layer}{suffix}'
360
+ key_name2 = f'weight_hh_l{layer}{suffix}'
361
+ # packed weights are part of torchbind class, CellParamsSerializationType
362
+ # Within the packed weight class, the weight and bias are accessible as Tensors
363
+ packed_weight_bias = self._all_weight_values[count].param.__getstate__()[0][4]
364
+ weight_bias_dict['weight'][key_name1] = packed_weight_bias[0].__getstate__()[0][0]
365
+ weight_bias_dict['weight'][key_name2] = packed_weight_bias[1].__getstate__()[0][0]
366
+ key_name1 = f'bias_ih_l{layer}{suffix}'
367
+ key_name2 = f'bias_hh_l{layer}{suffix}'
368
+ weight_bias_dict['bias'][key_name1] = packed_weight_bias[0].__getstate__()[0][1]
369
+ weight_bias_dict['bias'][key_name2] = packed_weight_bias[1].__getstate__()[0][1]
370
+ count = count + 1
371
+ return weight_bias_dict
372
+
373
+ def get_weight(self):
374
+ return self._weight_bias()['weight']
375
+
376
+ def get_bias(self):
377
+ return self._weight_bias()['bias']
378
+
379
+
380
+ class LSTM(RNNBase):
381
+ r"""
382
+ A dynamic quantized LSTM module with floating point tensor as inputs and outputs.
383
+ We adopt the same interface as `torch.nn.LSTM`, please see
384
+ https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for documentation.
385
+
386
+ Examples::
387
+
388
+ >>> # xdoctest: +SKIP
389
+ >>> rnn = nn.LSTM(10, 20, 2)
390
+ >>> input = torch.randn(5, 3, 10)
391
+ >>> h0 = torch.randn(2, 3, 20)
392
+ >>> c0 = torch.randn(2, 3, 20)
393
+ >>> output, (hn, cn) = rnn(input, (h0, c0))
394
+ """
395
+ _FLOAT_MODULE = nn.LSTM
396
+
397
+ __overloads__ = {'forward': ['forward_packed', 'forward_tensor']}
398
+
399
+ def __init__(self, *args, **kwargs):
400
+ super().__init__('LSTM', *args, **kwargs)
401
+
402
+ def _get_name(self):
403
+ return 'DynamicQuantizedLSTM'
404
+
405
+ def forward_impl(
406
+ self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]],
407
+ batch_sizes: Optional[Tensor], max_batch_size: int,
408
+ sorted_indices: Optional[Tensor]
409
+ ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]:
410
+ if hx is None:
411
+ num_directions = 2 if self.bidirectional else 1
412
+ zeros = torch.zeros(self.num_layers * num_directions,
413
+ max_batch_size, self.hidden_size,
414
+ dtype=input.dtype, device=input.device)
415
+ hx = (zeros, zeros)
416
+ else:
417
+ # Each batch of the hidden state should match the input sequence that
418
+ # the user believes he/she is passing in.
419
+ hx = self.permute_hidden(hx, sorted_indices)
420
+
421
+ self.check_forward_args(input, hx, batch_sizes)
422
+
423
+ _all_params = ([m.param for m in self._all_weight_values])
424
+ if batch_sizes is None:
425
+ result = torch.quantized_lstm(input, hx, _all_params, self.bias, self.num_layers,
426
+ float(self.dropout), self.training, self.bidirectional,
427
+ self.batch_first, dtype=self.dtype, use_dynamic=True)
428
+ else:
429
+ result = torch.quantized_lstm(input, batch_sizes, hx, _all_params, self.bias,
430
+ self.num_layers, float(self.dropout), self.training,
431
+ self.bidirectional, dtype=self.dtype, use_dynamic=True)
432
+ output = result[0]
433
+ hidden = result[1:]
434
+
435
+ return output, hidden
436
+
437
+ @torch.jit.export
438
+ def forward_tensor(
439
+ self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None
440
+ ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]:
441
+ batch_sizes = None
442
+ max_batch_size = input.size(0) if self.batch_first else input.size(1)
443
+ sorted_indices = None
444
+ unsorted_indices = None
445
+
446
+ output, hidden = self.forward_impl(
447
+ input, hx, batch_sizes, max_batch_size, sorted_indices)
448
+
449
+ return output, self.permute_hidden(hidden, unsorted_indices)
450
+
451
+ @torch.jit.export
452
+ def forward_packed(
453
+ self, input: PackedSequence, hx: Optional[Tuple[Tensor, Tensor]] = None
454
+ ) -> Tuple[PackedSequence, Tuple[Tensor, Tensor]]:
455
+ input_, batch_sizes, sorted_indices, unsorted_indices = input
456
+ max_batch_size = int(batch_sizes[0])
457
+
458
+ output_, hidden = self.forward_impl(
459
+ input_, hx, batch_sizes, max_batch_size, sorted_indices
460
+ )
461
+
462
+ output = PackedSequence(output_, batch_sizes,
463
+ sorted_indices, unsorted_indices)
464
+ return output, self.permute_hidden(hidden, unsorted_indices)
465
+
466
+ # "type: ignore" is required due to issue #43072
467
+ def permute_hidden( # type: ignore[override]
468
+ self, hx: Tuple[Tensor, Tensor], permutation: Optional[Tensor]
469
+ ) -> Tuple[Tensor, Tensor]:
470
+ if permutation is None:
471
+ return hx
472
+ return _apply_permutation(hx[0], permutation), _apply_permutation(hx[1], permutation)
473
+
474
+ # "type: ignore" is required due to issue #43072
475
+ def check_forward_args( # type: ignore[override]
476
+ self, input: Tensor, hidden: Tuple[Tensor, Tensor], batch_sizes: Optional[Tensor]
477
+ ) -> None:
478
+ self.check_input(input, batch_sizes)
479
+ expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
480
+
481
+ self.check_hidden_size(hidden[0], expected_hidden_size,
482
+ 'Expected hidden[0] size {}, got {}')
483
+ self.check_hidden_size(hidden[1], expected_hidden_size,
484
+ 'Expected hidden[1] size {}, got {}')
485
+
486
+ @torch.jit.ignore
487
+ def forward(self, input, hx=None):
488
+ if isinstance(input, PackedSequence):
489
+ return self.forward_packed(input, hx)
490
+ else:
491
+ return self.forward_tensor(input, hx)
492
+
493
+ @classmethod
494
+ def from_float(cls, mod):
495
+ return super().from_float(mod)
496
+
497
+ @classmethod
498
+ def from_reference(cls, ref_mod):
499
+ assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 "
500
+ "exists in LSTM, may need to relax the assumption to support the use case"
501
+ qmod = cls(
502
+ ref_mod.input_size,
503
+ ref_mod.hidden_size,
504
+ ref_mod.num_layers,
505
+ ref_mod.bias,
506
+ ref_mod.batch_first,
507
+ ref_mod.dropout,
508
+ ref_mod.bidirectional,
509
+ # assuming there is layer 0, which should be OK
510
+ ref_mod.weight_ih_l0_dtype,
511
+ )
512
+ qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict())
513
+ return qmod
514
+
515
+
516
+ class GRU(RNNBase):
517
+ r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
518
+
519
+
520
+ For each element in the input sequence, each layer computes the following
521
+ function:
522
+
523
+ .. math::
524
+ \begin{array}{ll}
525
+ r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
526
+ z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
527
+ n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
528
+ h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)}
529
+ \end{array}
530
+
531
+ where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the input
532
+ at time `t`, :math:`h_{(t-1)}` is the hidden state of the layer
533
+ at time `t-1` or the initial hidden state at time `0`, and :math:`r_t`,
534
+ :math:`z_t`, :math:`n_t` are the reset, update, and new gates, respectively.
535
+ :math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.
536
+
537
+ In a multilayer GRU, the input :math:`x^{(l)}_t` of the :math:`l` -th layer
538
+ (:math:`l >= 2`) is the hidden state :math:`h^{(l-1)}_t` of the previous layer multiplied by
539
+ dropout :math:`\delta^{(l-1)}_t` where each :math:`\delta^{(l-1)}_t` is a Bernoulli random
540
+ variable which is :math:`0` with probability :attr:`dropout`.
541
+
542
+ Args:
543
+ input_size: The number of expected features in the input `x`
544
+ hidden_size: The number of features in the hidden state `h`
545
+ num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
546
+ would mean stacking two GRUs together to form a `stacked GRU`,
547
+ with the second GRU taking in outputs of the first GRU and
548
+ computing the final results. Default: 1
549
+ bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
550
+ Default: ``True``
551
+ batch_first: If ``True``, then the input and output tensors are provided
552
+ as (batch, seq, feature). Default: ``False``
553
+ dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
554
+ GRU layer except the last layer, with dropout probability equal to
555
+ :attr:`dropout`. Default: 0
556
+ bidirectional: If ``True``, becomes a bidirectional GRU. Default: ``False``
557
+
558
+ Inputs: input, h_0
559
+ - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features
560
+ of the input sequence. The input can also be a packed variable length
561
+ sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence`
562
+ for details.
563
+ - **h_0** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
564
+ containing the initial hidden state for each element in the batch.
565
+ Defaults to zero if not provided. If the RNN is bidirectional,
566
+ num_directions should be 2, else it should be 1.
567
+
568
+ Outputs: output, h_n
569
+ - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
570
+ containing the output features h_t from the last layer of the GRU,
571
+ for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
572
+ given as the input, the output will also be a packed sequence.
573
+ For the unpacked case, the directions can be separated
574
+ using ``output.view(seq_len, batch, num_directions, hidden_size)``,
575
+ with forward and backward being direction `0` and `1` respectively.
576
+
577
+ Similarly, the directions can be separated in the packed case.
578
+ - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
579
+ containing the hidden state for `t = seq_len`
580
+
581
+ Like *output*, the layers can be separated using
582
+ ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
583
+
584
+ Shape:
585
+ - Input1: :math:`(L, N, H_{in})` tensor containing input features where
586
+ :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length.
587
+ - Input2: :math:`(S, N, H_{out})` tensor
588
+ containing the initial hidden state for each element in the batch.
589
+ :math:`H_{out}=\text{hidden\_size}`
590
+ Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}`
591
+ If the RNN is bidirectional, num_directions should be 2, else it should be 1.
592
+ - Output1: :math:`(L, N, H_{all})` where :math:`H_{all}=\text{num\_directions} * \text{hidden\_size}`
593
+ - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state
594
+ for each element in the batch
595
+
596
+ Attributes:
597
+ weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
598
+ (W_ir|W_iz|W_in), of shape `(3*hidden_size, input_size)` for `k = 0`.
599
+ Otherwise, the shape is `(3*hidden_size, num_directions * hidden_size)`
600
+ weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
601
+ (W_hr|W_hz|W_hn), of shape `(3*hidden_size, hidden_size)`
602
+ bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
603
+ (b_ir|b_iz|b_in), of shape `(3*hidden_size)`
604
+ bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
605
+ (b_hr|b_hz|b_hn), of shape `(3*hidden_size)`
606
+
607
+ .. note::
608
+ All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
609
+ where :math:`k = \frac{1}{\text{hidden\_size}}`
610
+
611
+ .. note::
612
+ The calculation of new gate :math:`n_t` subtly differs from the original paper and other frameworks.
613
+ In the original implementation, the Hadamard product :math:`(\odot)` between :math:`r_t` and the
614
+ previous hidden state :math:`h_{(t-1)}` is done before the multiplication with the weight matrix
615
+ `W` and addition of bias:
616
+
617
+ .. math::
618
+ \begin{aligned}
619
+ n_t = \tanh(W_{in} x_t + b_{in} + W_{hn} ( r_t \odot h_{(t-1)} ) + b_{hn})
620
+ \end{aligned}
621
+
622
+ This is in contrast to PyTorch implementation, which is done after :math:`W_{hn} h_{(t-1)}`
623
+
624
+ .. math::
625
+ \begin{aligned}
626
+ n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn}))
627
+ \end{aligned}
628
+
629
+ This implementation differs on purpose for efficiency.
630
+
631
+ .. include:: ../cudnn_persistent_rnn.rst
632
+
633
+ Examples::
634
+
635
+ >>> # xdoctest: +SKIP
636
+ >>> rnn = nn.GRU(10, 20, 2)
637
+ >>> input = torch.randn(5, 3, 10)
638
+ >>> h0 = torch.randn(2, 3, 20)
639
+ >>> output, hn = rnn(input, h0)
640
+ """
641
+ _FLOAT_MODULE = nn.GRU
642
+
643
+ __overloads__ = {'forward': ['forward_packed', 'forward_tensor']}
644
+
645
+ def __init__(self, *args, **kwargs):
646
+ super().__init__('GRU', *args, **kwargs)
647
+
648
+ def _get_name(self):
649
+ return 'DynamicQuantizedGRU'
650
+
651
+ def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]) -> None:
652
+ self.check_input(input, batch_sizes)
653
+ expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
654
+
655
+ self.check_hidden_size(hidden, expected_hidden_size,
656
+ 'Expected hidden size {}, got {}')
657
+
658
+ def forward_impl(
659
+ self, input: Tensor, hx: Optional[Tensor],
660
+ batch_sizes: Optional[Tensor], max_batch_size: int,
661
+ sorted_indices: Optional[Tensor]
662
+ ) -> Tuple[Tensor, Tensor]:
663
+ if hx is None:
664
+ num_directions = 2 if self.bidirectional else 1
665
+ zeros = torch.zeros(self.num_layers * num_directions,
666
+ max_batch_size, self.hidden_size,
667
+ dtype=input.dtype, device=input.device)
668
+ hx = zeros
669
+ else:
670
+ # Each batch of the hidden state should match the input sequence that
671
+ # the user believes he/she is passing in.
672
+ hx = self.permute_hidden(hx, sorted_indices)
673
+
674
+ self.check_forward_args(input, hx, batch_sizes)
675
+
676
+ _all_params = ([m.param for m in self._all_weight_values])
677
+ if batch_sizes is None:
678
+ result = torch.quantized_gru(input,
679
+ hx,
680
+ _all_params,
681
+ self.bias,
682
+ self.num_layers,
683
+ self.dropout,
684
+ self.training,
685
+ self.bidirectional,
686
+ self.batch_first)
687
+ else:
688
+ result = torch.quantized_gru(input,
689
+ batch_sizes,
690
+ hx,
691
+ _all_params,
692
+ self.bias,
693
+ self.num_layers,
694
+ self.dropout,
695
+ self.training,
696
+ self.bidirectional)
697
+ output = result[0]
698
+ hidden = result[1]
699
+
700
+ return output, hidden
701
+
702
+
703
+ @torch.jit.export
704
+ def forward_tensor(
705
+ self, input: Tensor, hx: Optional[Tensor] = None
706
+ ) -> Tuple[Tensor, Tensor]:
707
+ batch_sizes = None
708
+ max_batch_size = input.size(0) if self.batch_first else input.size(1)
709
+ sorted_indices = None
710
+ unsorted_indices = None
711
+
712
+ output, hidden = self.forward_impl(
713
+ input, hx, batch_sizes, max_batch_size, sorted_indices)
714
+
715
+ return output, self.permute_hidden(hidden, unsorted_indices)
716
+
717
+ @torch.jit.export
718
+ def forward_packed(
719
+ self, input: PackedSequence, hx: Optional[Tensor] = None
720
+ ) -> Tuple[PackedSequence, Tensor]:
721
+ input_, batch_sizes, sorted_indices, unsorted_indices = input
722
+ max_batch_size = int(batch_sizes[0])
723
+ output_, hidden = self.forward_impl(
724
+ input_, hx, batch_sizes, max_batch_size, sorted_indices
725
+ )
726
+
727
+ output = PackedSequence(output_, batch_sizes,
728
+ sorted_indices, unsorted_indices)
729
+ return output, self.permute_hidden(hidden, unsorted_indices)
730
+
731
+ def permute_hidden(
732
+ self, hx: Tensor, permutation: Optional[Tensor]
733
+ ) -> Tensor:
734
+ if permutation is None:
735
+ return hx
736
+ return _apply_permutation(hx, permutation)
737
+
738
+ @torch.jit.ignore
739
+ def forward(self, input, hx=None):
740
+ if isinstance(input, PackedSequence):
741
+ return self.forward_packed(input, hx)
742
+ else:
743
+ return self.forward_tensor(input, hx)
744
+
745
+ @classmethod
746
+ def from_float(cls, mod):
747
+ return super().from_float(mod)
748
+
749
+ @classmethod
750
+ def from_reference(cls, ref_mod):
751
+ assert hasattr(ref_mod, "weight_ih_l0_dtype"), "We are assuming weight_ih_l0 "
752
+ "exists in LSTM, may need to relax the assumption to support the use case"
753
+ qmod = cls(
754
+ ref_mod.input_size,
755
+ ref_mod.hidden_size,
756
+ ref_mod.num_layers,
757
+ ref_mod.bias,
758
+ ref_mod.batch_first,
759
+ ref_mod.dropout,
760
+ ref_mod.bidirectional,
761
+ # assuming there is layer 0, which should be OK
762
+ ref_mod.weight_ih_l0_dtype,
763
+ )
764
+ qmod.set_weight_bias(ref_mod.get_quantized_weight_bias_dict())
765
+ return qmod
766
+
767
+ class RNNCellBase(torch.nn.Module):
768
+ # _FLOAT_MODULE = nn.CellRNNBase
769
+ __constants__ = ['input_size', 'hidden_size', 'bias']
770
+
771
+ def __init__(self, input_size, hidden_size, bias=True, num_chunks=4, dtype=torch.qint8):
772
+ super().__init__()
773
+ self.input_size = input_size
774
+ self.hidden_size = hidden_size
775
+ self.bias = bias
776
+ self.weight_dtype = dtype
777
+ if bias:
778
+ self.bias_ih = torch.randn(num_chunks * hidden_size).to(dtype=torch.float)
779
+ self.bias_hh = torch.randn(num_chunks * hidden_size).to(dtype=torch.float)
780
+ else:
781
+ self.register_parameter('bias_ih', None)
782
+ self.register_parameter('bias_hh', None)
783
+
784
+ weight_ih = torch.randn(num_chunks * hidden_size, input_size).to(torch.float)
785
+ weight_hh = torch.randn(num_chunks * hidden_size, hidden_size).to(torch.float)
786
+ if dtype == torch.qint8:
787
+ weight_ih = torch.quantize_per_tensor(weight_ih, scale=1, zero_point=0, dtype=torch.qint8)
788
+ weight_hh = torch.quantize_per_tensor(weight_hh, scale=1, zero_point=0, dtype=torch.qint8)
789
+
790
+ if dtype == torch.qint8:
791
+ # for each layer, for each direction we need to quantize and pack
792
+ # weights and pack parameters in this order:
793
+ #
794
+ # w_ih, w_hh
795
+ packed_weight_ih = \
796
+ torch.ops.quantized.linear_prepack(weight_ih, self.bias_ih)
797
+ packed_weight_hh = \
798
+ torch.ops.quantized.linear_prepack(weight_hh, self.bias_hh)
799
+ else:
800
+ # for each layer, for each direction we need to quantize and pack
801
+ # weights and pack parameters in this order:
802
+ #
803
+ # packed_ih, packed_hh, b_ih, b_hh
804
+ packed_weight_ih = torch.ops.quantized.linear_prepack_fp16(
805
+ weight_ih, self.bias_ih)
806
+ packed_weight_hh = torch.ops.quantized.linear_prepack_fp16(
807
+ weight_hh, self.bias_hh)
808
+
809
+ self._packed_weight_ih = packed_weight_ih
810
+ self._packed_weight_hh = packed_weight_hh
811
+
812
+ def _get_name(self):
813
+ return 'DynamicQuantizedRNNBase'
814
+
815
+ def extra_repr(self):
816
+ s = '{input_size}, {hidden_size}'
817
+ if 'bias' in self.__dict__ and self.bias is not True:
818
+ s += ', bias={bias}'
819
+ if 'nonlinearity' in self.__dict__ and self.nonlinearity != "tanh":
820
+ s += ', nonlinearity={nonlinearity}'
821
+ return s.format(**self.__dict__)
822
+
823
+ def check_forward_input(self, input):
824
+ if input.size(1) != self.input_size:
825
+ raise RuntimeError(
826
+ f"input has inconsistent input_size: got {input.size(1)}, expected {self.input_size}")
827
+
828
+ def check_forward_hidden(self, input: Tensor, hx: Tensor, hidden_label: str = '') -> None:
829
+ if input.size(0) != hx.size(0):
830
+ raise RuntimeError(
831
+ f"Input batch size {input.size(0)} doesn't match hidden{hidden_label} batch size {hx.size(0)}")
832
+
833
+ if hx.size(1) != self.hidden_size:
834
+ raise RuntimeError(
835
+ f"hidden{hidden_label} has inconsistent hidden_size: got {hx.size(1)}, expected {self.hidden_size}")
836
+
837
+ @classmethod
838
+ def from_float(cls, mod):
839
+ assert type(mod) in {torch.nn.LSTMCell,
840
+ torch.nn.GRUCell,
841
+ torch.nn.RNNCell}, 'nn.quantized.dynamic.RNNCellBase.from_float \
842
+ only works for nn.LSTMCell, nn.GRUCell and nn.RNNCell'
843
+ assert hasattr(
844
+ mod, 'qconfig'), 'Input float module must have qconfig defined'
845
+
846
+ if mod.qconfig is not None and mod.qconfig.weight is not None:
847
+ weight_observer_method = mod.qconfig.weight
848
+ else:
849
+ # We have the circular import issues if we import the qconfig in the beginning of this file:
850
+ # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
851
+ # import until we need it.
852
+ from torch.ao.quantization.qconfig import default_dynamic_qconfig
853
+ weight_observer_method = default_dynamic_qconfig.weight
854
+
855
+ dtype = weight_observer_method().dtype
856
+ supported_scalar_types = [torch.qint8, torch.float16]
857
+ if dtype not in supported_scalar_types:
858
+ raise RuntimeError(f'Unsupported dtype for dynamic RNN quantization: {dtype}')
859
+
860
+ qRNNCellBase: Union[LSTMCell, GRUCell, RNNCell]
861
+
862
+ if type(mod) == torch.nn.LSTMCell:
863
+ qRNNCellBase = LSTMCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype)
864
+ elif type(mod) == torch.nn.GRUCell:
865
+ qRNNCellBase = GRUCell(mod.input_size, mod.hidden_size, bias=mod.bias, dtype=dtype)
866
+ elif type(mod) == torch.nn.RNNCell:
867
+ qRNNCellBase = RNNCell(mod.input_size, mod.hidden_size, bias=mod.bias, nonlinearity=mod.nonlinearity, dtype=dtype)
868
+ else:
869
+ raise NotImplementedError('Only LSTMCell, GRUCell and RNNCell \
870
+ are supported for QuantizedRNN for now')
871
+
872
+ assert mod.bias
873
+
874
+ def _observe_and_quantize_weight(weight):
875
+ if dtype == torch.qint8:
876
+ weight_observer = weight_observer_method()
877
+ weight_observer(weight)
878
+ qweight = _quantize_weight(weight.float(), weight_observer)
879
+ return qweight
880
+ else:
881
+ return weight.float()
882
+
883
+ qRNNCellBase._packed_weight_ih = pack_weight_bias(_observe_and_quantize_weight(mod.weight_ih), mod.bias_ih, dtype)
884
+ qRNNCellBase._packed_weight_hh = pack_weight_bias(_observe_and_quantize_weight(mod.weight_hh), mod.bias_hh, dtype)
885
+ return qRNNCellBase
886
+
887
+ @classmethod
888
+ def from_reference(cls, ref_mod):
889
+ assert hasattr(ref_mod, "weight_ih_dtype"), "We are assuming weight_ih "
890
+ "exists in reference module, may need to relax the assumption to support the use case"
891
+ if hasattr(ref_mod, "nonlinearity"):
892
+ qmod = cls(
893
+ ref_mod.input_size,
894
+ ref_mod.hidden_size,
895
+ ref_mod.bias,
896
+ ref_mod.nonlinearity,
897
+ dtype=ref_mod.weight_ih_dtype
898
+ )
899
+ else:
900
+ qmod = cls(
901
+ ref_mod.input_size,
902
+ ref_mod.hidden_size,
903
+ ref_mod.bias,
904
+ dtype=ref_mod.weight_ih_dtype
905
+ )
906
+ weight_bias_dict = {
907
+ "weight": {
908
+ "weight_ih": ref_mod.get_quantized_weight_ih(),
909
+ "weight_hh": ref_mod.get_quantized_weight_hh(),
910
+ },
911
+ "bias": {
912
+ "bias_ih": ref_mod.bias_ih,
913
+ "bias_hh": ref_mod.bias_hh,
914
+ }
915
+ }
916
+ qmod.set_weight_bias(weight_bias_dict)
917
+ return qmod
918
+
919
+ def _weight_bias(self):
920
+ # Returns a dict of weights and biases
921
+ weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}}
922
+ w1, b1 = self._packed_weight_ih.__getstate__()[0]
923
+ w2, b2 = self._packed_weight_hh.__getstate__()[0]
924
+ # TODO: these can be simplified to one level? e.g. using weight_ih as key
925
+ # directly
926
+ weight_bias_dict['weight']['weight_ih'] = w1
927
+ weight_bias_dict['weight']['weight_hh'] = w2
928
+ weight_bias_dict['bias']['bias_ih'] = b1
929
+ weight_bias_dict['bias']['bias_hh'] = b2
930
+ return weight_bias_dict
931
+
932
+ def get_weight(self):
933
+ return self._weight_bias()['weight']
934
+
935
+ def get_bias(self):
936
+ return self._weight_bias()['bias']
937
+
938
+ def set_weight_bias(self, weight_bias_dict):
939
+ # TODO: these can be simplified to one level? e.g. using weight_ih as key
940
+ # directly
941
+ self._packed_weight_ih = pack_weight_bias(
942
+ weight_bias_dict["weight"]["weight_ih"],
943
+ weight_bias_dict["bias"]["bias_ih"],
944
+ self.weight_dtype)
945
+ self._packed_weight_hh = pack_weight_bias(
946
+ weight_bias_dict["weight"]["weight_hh"],
947
+ weight_bias_dict["bias"]["bias_hh"],
948
+ self.weight_dtype)
949
+
950
+ def _save_to_state_dict(self, destination, prefix, keep_vars):
951
+ super()._save_to_state_dict(destination, prefix, keep_vars)
952
+ destination[prefix + '_packed_weight_ih'] = self._packed_weight_ih
953
+ destination[prefix + '_packed_weight_hh'] = self._packed_weight_hh
954
+
955
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
956
+ missing_keys, unexpected_keys, error_msgs):
957
+ self._packed_weight_ih = state_dict.pop(prefix + '_packed_weight_ih')
958
+ self._packed_weight_hh = state_dict.pop(prefix + '_packed_weight_hh')
959
+ super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
960
+ missing_keys, unexpected_keys, error_msgs)
961
+
962
+
963
+ class RNNCell(RNNCellBase):
964
+ r"""An Elman RNN cell with tanh or ReLU non-linearity.
965
+ A dynamic quantized RNNCell module with floating point tensor as inputs and outputs.
966
+ Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.RNNCell`,
967
+ please see https://pytorch.org/docs/stable/nn.html#torch.nn.RNNCell for documentation.
968
+
969
+ Examples::
970
+
971
+ >>> # xdoctest: +SKIP
972
+ >>> rnn = nn.RNNCell(10, 20)
973
+ >>> input = torch.randn(6, 3, 10)
974
+ >>> hx = torch.randn(3, 20)
975
+ >>> output = []
976
+ >>> for i in range(6):
977
+ ... hx = rnn(input[i], hx)
978
+ ... output.append(hx)
979
+ """
980
+ __constants__ = ['input_size', 'hidden_size', 'bias', 'nonlinearity']
981
+
982
+ def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh", dtype=torch.qint8):
983
+ super().__init__(input_size, hidden_size, bias, num_chunks=1, dtype=dtype)
984
+ self.nonlinearity = nonlinearity
985
+
986
+ def _get_name(self):
987
+ return 'DynamicQuantizedRNNCell'
988
+
989
+ def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
990
+ self.check_forward_input(input)
991
+ if hx is None:
992
+ hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
993
+ self.check_forward_hidden(input, hx, '')
994
+ if self.nonlinearity == "tanh":
995
+ ret = torch.ops.quantized.quantized_rnn_tanh_cell_dynamic(
996
+ input, hx,
997
+ self._packed_weight_ih, self._packed_weight_hh,
998
+ self.bias_ih, self.bias_hh)
999
+ elif self.nonlinearity == "relu":
1000
+ ret = torch.ops.quantized.quantized_rnn_relu_cell_dynamic(
1001
+ input, hx,
1002
+ self._packed_weight_ih, self._packed_weight_hh,
1003
+ self.bias_ih, self.bias_hh)
1004
+ else:
1005
+ ret = input # TODO: remove when jit supports exception flow
1006
+ raise RuntimeError(
1007
+ f"Unknown nonlinearity: {self.nonlinearity}")
1008
+ return ret
1009
+
1010
+ @classmethod
1011
+ def from_float(cls, mod):
1012
+ return super().from_float(mod)
1013
+
1014
+
1015
+ class LSTMCell(RNNCellBase):
1016
+ r"""A long short-term memory (LSTM) cell.
1017
+
1018
+ A dynamic quantized LSTMCell module with floating point tensor as inputs and outputs.
1019
+ Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.LSTMCell`,
1020
+ please see https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell for documentation.
1021
+
1022
+ Examples::
1023
+
1024
+ >>> # xdoctest: +SKIP
1025
+ >>> rnn = nn.LSTMCell(10, 20)
1026
+ >>> input = torch.randn(6, 3, 10)
1027
+ >>> hx = torch.randn(3, 20)
1028
+ >>> cx = torch.randn(3, 20)
1029
+ >>> output = []
1030
+ >>> for i in range(6):
1031
+ ... hx, cx = rnn(input[i], (hx, cx))
1032
+ ... output.append(hx)
1033
+ """
1034
+
1035
+ def __init__(self, *args, **kwargs):
1036
+ super().__init__(*args, num_chunks=4, **kwargs) # type: ignore[misc]
1037
+
1038
+ def _get_name(self):
1039
+ return 'DynamicQuantizedLSTMCell'
1040
+
1041
+ def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
1042
+ self.check_forward_input(input)
1043
+ if hx is None:
1044
+ zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
1045
+ hx = (zeros, zeros)
1046
+ self.check_forward_hidden(input, hx[0], '[0]')
1047
+ self.check_forward_hidden(input, hx[1], '[1]')
1048
+ return torch.ops.quantized.quantized_lstm_cell_dynamic(
1049
+ input, hx,
1050
+ self._packed_weight_ih, self._packed_weight_hh,
1051
+ self.bias_ih, self.bias_hh)
1052
+
1053
+ @classmethod
1054
+ def from_float(cls, mod):
1055
+ return super().from_float(mod)
1056
+
1057
+
1058
+ class GRUCell(RNNCellBase):
1059
+ r"""A gated recurrent unit (GRU) cell
1060
+
1061
+ A dynamic quantized GRUCell module with floating point tensor as inputs and outputs.
1062
+ Weights are quantized to 8 bits. We adopt the same interface as `torch.nn.GRUCell`,
1063
+ please see https://pytorch.org/docs/stable/nn.html#torch.nn.GRUCell for documentation.
1064
+
1065
+ Examples::
1066
+
1067
+ >>> # xdoctest: +SKIP
1068
+ >>> rnn = nn.GRUCell(10, 20)
1069
+ >>> input = torch.randn(6, 3, 10)
1070
+ >>> hx = torch.randn(3, 20)
1071
+ >>> output = []
1072
+ >>> for i in range(6):
1073
+ ... hx = rnn(input[i], hx)
1074
+ ... output.append(hx)
1075
+ """
1076
+
1077
+ def __init__(self, input_size, hidden_size, bias=True, dtype=torch.qint8):
1078
+ super().__init__(input_size, hidden_size, bias, num_chunks=3, dtype=dtype)
1079
+
1080
+ def _get_name(self):
1081
+ return 'DynamicQuantizedGRUCell'
1082
+
1083
+ def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
1084
+ self.check_forward_input(input)
1085
+ if hx is None:
1086
+ hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
1087
+ self.check_forward_hidden(input, hx, '')
1088
+ return torch.ops.quantized.quantized_gru_cell_dynamic(
1089
+ input, hx,
1090
+ self._packed_weight_ih, self._packed_weight_hh,
1091
+ self.bias_ih, self.bias_hh,
1092
+ )
1093
+
1094
+ @classmethod
1095
+ def from_float(cls, mod):
1096
+ return super().from_float(mod)