Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +248 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/__init__.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_adapters.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_collections.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_compat.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_functools.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_itertools.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_meta.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_text.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_typing.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/__init__.py +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/__init__.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py311.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py39.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/py311.py +22 -0
- .venv/lib/python3.12/site-packages/importlib_metadata/compat/py39.py +42 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn.h +68 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_adv.h +669 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_adv_v9.h +669 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_backend.h +60 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_backend_v9.h +60 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_cnn.h +693 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_cnn_v9.h +693 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_graph.h +992 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_graph_v9.h +992 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_ops.h +1316 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_ops_v9.h +1316 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_v9.h +68 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_version.h +70 -0
- .venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_version_v9.h +70 -0
- .venv/lib/python3.12/site-packages/nvidia_nccl_cu12-2.27.3.dist-info/licenses/License.txt +39 -0
- .venv/lib/python3.12/site-packages/sklearn/__check_build/__init__.py +54 -0
- .venv/lib/python3.12/site-packages/sklearn/__check_build/_check_build.cpython-312-x86_64-linux-gnu.so +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__check_build/_check_build.pyx +2 -0
- .venv/lib/python3.12/site-packages/sklearn/__check_build/meson.build +6 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/__init__.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/_built_with_meson.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/_config.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/_distributor_init.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/base.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/__pycache__/exceptions.cpython-312.pyc +0 -0
- .venv/lib/python3.12/site-packages/sklearn/_build_utils/__init__.py +0 -0
- .venv/lib/python3.12/site-packages/sklearn/_build_utils/tempita.py +62 -0
- .venv/lib/python3.12/site-packages/sklearn/_build_utils/version.py +16 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/__init__.py +33 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/_loss.pxd +101 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/_loss.pyx.tp +1505 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/link.py +282 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/loss.py +1181 -0
- .venv/lib/python3.12/site-packages/sklearn/_loss/meson.build +23 -0
.gitattributes
CHANGED
|
@@ -809,3 +809,251 @@ illustrious_generated/3e2afaad2b7d.png filter=lfs diff=lfs merge=lfs -text
|
|
| 809 |
illustrious_generated/04d6bfa98264.png filter=lfs diff=lfs merge=lfs -text
|
| 810 |
illustrious_generated/62a8fa0ac7dd.png filter=lfs diff=lfs merge=lfs -text
|
| 811 |
illustrious_generated/d190d03f64a7.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
illustrious_generated/04d6bfa98264.png filter=lfs diff=lfs merge=lfs -text
|
| 810 |
illustrious_generated/62a8fa0ac7dd.png filter=lfs diff=lfs merge=lfs -text
|
| 811 |
illustrious_generated/d190d03f64a7.png filter=lfs diff=lfs merge=lfs -text
|
| 812 |
+
illustrious_generated/f6342e8db68a.png filter=lfs diff=lfs merge=lfs -text
|
| 813 |
+
illustrious_generated/f7ca451e1933.png filter=lfs diff=lfs merge=lfs -text
|
| 814 |
+
illustrious_generated/6b3c44df8332.png filter=lfs diff=lfs merge=lfs -text
|
| 815 |
+
illustrious_generated/ed13e74032fb.png filter=lfs diff=lfs merge=lfs -text
|
| 816 |
+
illustrious_generated/faa1e7049117.png filter=lfs diff=lfs merge=lfs -text
|
| 817 |
+
illustrious_generated/c17212cc7fda.png filter=lfs diff=lfs merge=lfs -text
|
| 818 |
+
illustrious_generated/6c268f463a2b.png filter=lfs diff=lfs merge=lfs -text
|
| 819 |
+
illustrious_generated/a364591ba4c1.png filter=lfs diff=lfs merge=lfs -text
|
| 820 |
+
illustrious_generated/2ea3ba7918b4.png filter=lfs diff=lfs merge=lfs -text
|
| 821 |
+
illustrious_generated/2ffb09f5cbc0.png filter=lfs diff=lfs merge=lfs -text
|
| 822 |
+
illustrious_generated/0d55065059c0.png filter=lfs diff=lfs merge=lfs -text
|
| 823 |
+
illustrious_generated/85e9723ae8cf.png filter=lfs diff=lfs merge=lfs -text
|
| 824 |
+
illustrious_generated/e89ab638d462.png filter=lfs diff=lfs merge=lfs -text
|
| 825 |
+
illustrious_generated/224c2084abb8.png filter=lfs diff=lfs merge=lfs -text
|
| 826 |
+
illustrious_generated/0b77d88bc5f0.png filter=lfs diff=lfs merge=lfs -text
|
| 827 |
+
illustrious_generated/91076903bce5.png filter=lfs diff=lfs merge=lfs -text
|
| 828 |
+
illustrious_generated/7acda55248bc.png filter=lfs diff=lfs merge=lfs -text
|
| 829 |
+
illustrious_generated/ee32c9618a12.png filter=lfs diff=lfs merge=lfs -text
|
| 830 |
+
illustrious_generated/698a4bf05f13.png filter=lfs diff=lfs merge=lfs -text
|
| 831 |
+
illustrious_generated/bf97f1eaffeb.png filter=lfs diff=lfs merge=lfs -text
|
| 832 |
+
illustrious_generated/62daa562132c.png filter=lfs diff=lfs merge=lfs -text
|
| 833 |
+
illustrious_generated/9ee7e057c8a2.png filter=lfs diff=lfs merge=lfs -text
|
| 834 |
+
illustrious_generated/427d956c743b.png filter=lfs diff=lfs merge=lfs -text
|
| 835 |
+
illustrious_generated/06da7f820423.png filter=lfs diff=lfs merge=lfs -text
|
| 836 |
+
illustrious_generated/92bcab0aaba1.png filter=lfs diff=lfs merge=lfs -text
|
| 837 |
+
illustrious_generated/502a84449b45.png filter=lfs diff=lfs merge=lfs -text
|
| 838 |
+
illustrious_generated/d99abaed93ba.png filter=lfs diff=lfs merge=lfs -text
|
| 839 |
+
illustrious_generated/3a12bf82c05e.png filter=lfs diff=lfs merge=lfs -text
|
| 840 |
+
illustrious_generated/433a115b55a3.png filter=lfs diff=lfs merge=lfs -text
|
| 841 |
+
illustrious_generated/574012fe8664.png filter=lfs diff=lfs merge=lfs -text
|
| 842 |
+
illustrious_generated/7d22dc2a6fb2.png filter=lfs diff=lfs merge=lfs -text
|
| 843 |
+
illustrious_generated/4f23c350b644.png filter=lfs diff=lfs merge=lfs -text
|
| 844 |
+
illustrious_generated/e24085ea542f.png filter=lfs diff=lfs merge=lfs -text
|
| 845 |
+
illustrious_generated/3cc7f3366f7a.png filter=lfs diff=lfs merge=lfs -text
|
| 846 |
+
illustrious_generated/5242430c6777.png filter=lfs diff=lfs merge=lfs -text
|
| 847 |
+
illustrious_generated/6fe5f96649a3.png filter=lfs diff=lfs merge=lfs -text
|
| 848 |
+
illustrious_generated/12875eda15eb.png filter=lfs diff=lfs merge=lfs -text
|
| 849 |
+
illustrious_generated/eac29190186c.png filter=lfs diff=lfs merge=lfs -text
|
| 850 |
+
illustrious_generated/c1276a9fc21b.png filter=lfs diff=lfs merge=lfs -text
|
| 851 |
+
illustrious_generated/a891e5d92031.png filter=lfs diff=lfs merge=lfs -text
|
| 852 |
+
illustrious_generated/0367ba694b76.png filter=lfs diff=lfs merge=lfs -text
|
| 853 |
+
illustrious_generated/f84f116882be.png filter=lfs diff=lfs merge=lfs -text
|
| 854 |
+
illustrious_generated/b8e81c1a4bd1.png filter=lfs diff=lfs merge=lfs -text
|
| 855 |
+
illustrious_generated/392a7a129a01.png filter=lfs diff=lfs merge=lfs -text
|
| 856 |
+
illustrious_generated/1506e01a5598.png filter=lfs diff=lfs merge=lfs -text
|
| 857 |
+
illustrious_generated/cbd5827b38ea.png filter=lfs diff=lfs merge=lfs -text
|
| 858 |
+
illustrious_generated/b80b59fe722f.png filter=lfs diff=lfs merge=lfs -text
|
| 859 |
+
illustrious_generated/a2ca03055273.png filter=lfs diff=lfs merge=lfs -text
|
| 860 |
+
illustrious_generated/b58cf17494db.png filter=lfs diff=lfs merge=lfs -text
|
| 861 |
+
illustrious_generated/4c587778617b.png filter=lfs diff=lfs merge=lfs -text
|
| 862 |
+
illustrious_generated/7c5200560049.png filter=lfs diff=lfs merge=lfs -text
|
| 863 |
+
illustrious_generated/b78d0c1f0687.png filter=lfs diff=lfs merge=lfs -text
|
| 864 |
+
illustrious_generated/5c6f22f08540.png filter=lfs diff=lfs merge=lfs -text
|
| 865 |
+
illustrious_generated/9b2b12c21a2b.png filter=lfs diff=lfs merge=lfs -text
|
| 866 |
+
illustrious_generated/ec96a311c2cb.png filter=lfs diff=lfs merge=lfs -text
|
| 867 |
+
illustrious_generated/a28e4715fc8c.png filter=lfs diff=lfs merge=lfs -text
|
| 868 |
+
illustrious_generated/00f5e16a2236.png filter=lfs diff=lfs merge=lfs -text
|
| 869 |
+
illustrious_generated/0ef8c1ed2c6c.png filter=lfs diff=lfs merge=lfs -text
|
| 870 |
+
illustrious_generated/f214facc5681.png filter=lfs diff=lfs merge=lfs -text
|
| 871 |
+
illustrious_generated/f41b4fc2c7d5.png filter=lfs diff=lfs merge=lfs -text
|
| 872 |
+
illustrious_generated/9e9a0ce3d676.png filter=lfs diff=lfs merge=lfs -text
|
| 873 |
+
illustrious_generated/26d2ef2d7d03.png filter=lfs diff=lfs merge=lfs -text
|
| 874 |
+
illustrious_generated/1e774fcc188d.png filter=lfs diff=lfs merge=lfs -text
|
| 875 |
+
illustrious_generated/7eab3f4f0c8e.png filter=lfs diff=lfs merge=lfs -text
|
| 876 |
+
illustrious_generated/f8631de95d70.png filter=lfs diff=lfs merge=lfs -text
|
| 877 |
+
illustrious_generated/8d95e57fcb27.png filter=lfs diff=lfs merge=lfs -text
|
| 878 |
+
illustrious_generated/7ac791baad53.png filter=lfs diff=lfs merge=lfs -text
|
| 879 |
+
illustrious_generated/7b8529c066a0.png filter=lfs diff=lfs merge=lfs -text
|
| 880 |
+
illustrious_generated/7d8509931e4e.png filter=lfs diff=lfs merge=lfs -text
|
| 881 |
+
illustrious_generated/9fafd1175b72.png filter=lfs diff=lfs merge=lfs -text
|
| 882 |
+
illustrious_generated/7023242de1c0.png filter=lfs diff=lfs merge=lfs -text
|
| 883 |
+
illustrious_generated/99d5b088ccd4.png filter=lfs diff=lfs merge=lfs -text
|
| 884 |
+
illustrious_generated/2bac6ab4413e.png filter=lfs diff=lfs merge=lfs -text
|
| 885 |
+
illustrious_generated/00ff6449b55d.png filter=lfs diff=lfs merge=lfs -text
|
| 886 |
+
illustrious_generated/7b900f6e27b1.png filter=lfs diff=lfs merge=lfs -text
|
| 887 |
+
illustrious_generated/69e10254baf5.png filter=lfs diff=lfs merge=lfs -text
|
| 888 |
+
illustrious_generated/93d9e9abc98e.png filter=lfs diff=lfs merge=lfs -text
|
| 889 |
+
illustrious_generated/095dc81d1160.png filter=lfs diff=lfs merge=lfs -text
|
| 890 |
+
illustrious_generated/3315198d28df.png filter=lfs diff=lfs merge=lfs -text
|
| 891 |
+
illustrious_generated/2549abad7eff.png filter=lfs diff=lfs merge=lfs -text
|
| 892 |
+
illustrious_generated/8a90db3476ef.png filter=lfs diff=lfs merge=lfs -text
|
| 893 |
+
illustrious_generated/72473c769552.png filter=lfs diff=lfs merge=lfs -text
|
| 894 |
+
illustrious_generated/bbf3fb096202.png filter=lfs diff=lfs merge=lfs -text
|
| 895 |
+
illustrious_generated/c5e0eb8a2241.png filter=lfs diff=lfs merge=lfs -text
|
| 896 |
+
illustrious_generated/8fa96985fc06.png filter=lfs diff=lfs merge=lfs -text
|
| 897 |
+
illustrious_generated/645e3b996530.png filter=lfs diff=lfs merge=lfs -text
|
| 898 |
+
illustrious_generated/b9fdc64b985c.png filter=lfs diff=lfs merge=lfs -text
|
| 899 |
+
illustrious_generated/fa67e15ca2bf.png filter=lfs diff=lfs merge=lfs -text
|
| 900 |
+
illustrious_generated/9f5c49f2e362.png filter=lfs diff=lfs merge=lfs -text
|
| 901 |
+
illustrious_generated/e8318516b273.png filter=lfs diff=lfs merge=lfs -text
|
| 902 |
+
illustrious_generated/e801a5ce2da6.png filter=lfs diff=lfs merge=lfs -text
|
| 903 |
+
illustrious_generated/cd9145683d1e.png filter=lfs diff=lfs merge=lfs -text
|
| 904 |
+
illustrious_generated/275253c8ad6b.png filter=lfs diff=lfs merge=lfs -text
|
| 905 |
+
illustrious_generated/f2a6e0c5c432.png filter=lfs diff=lfs merge=lfs -text
|
| 906 |
+
illustrious_generated/586dbda7c6ff.png filter=lfs diff=lfs merge=lfs -text
|
| 907 |
+
illustrious_generated/dff506d177c0.png filter=lfs diff=lfs merge=lfs -text
|
| 908 |
+
illustrious_generated/c8846919f3a8.png filter=lfs diff=lfs merge=lfs -text
|
| 909 |
+
illustrious_generated/afbdb8dce1e5.png filter=lfs diff=lfs merge=lfs -text
|
| 910 |
+
illustrious_generated/fd4c46f2141f.png filter=lfs diff=lfs merge=lfs -text
|
| 911 |
+
illustrious_generated/ee36cea22c91.png filter=lfs diff=lfs merge=lfs -text
|
| 912 |
+
illustrious_generated/6ca60a86b836.png filter=lfs diff=lfs merge=lfs -text
|
| 913 |
+
illustrious_generated/11c7f55b2aab.png filter=lfs diff=lfs merge=lfs -text
|
| 914 |
+
illustrious_generated/d684bc0d0627.png filter=lfs diff=lfs merge=lfs -text
|
| 915 |
+
illustrious_generated/4f1602c01d5b.png filter=lfs diff=lfs merge=lfs -text
|
| 916 |
+
illustrious_generated/45c709323899.png filter=lfs diff=lfs merge=lfs -text
|
| 917 |
+
illustrious_generated/d7bc7c5ba632.png filter=lfs diff=lfs merge=lfs -text
|
| 918 |
+
illustrious_generated/0e0acc59ef85.png filter=lfs diff=lfs merge=lfs -text
|
| 919 |
+
illustrious_generated/1c7a7ed6f359.png filter=lfs diff=lfs merge=lfs -text
|
| 920 |
+
illustrious_generated/31cbd66704bb.png filter=lfs diff=lfs merge=lfs -text
|
| 921 |
+
illustrious_generated/dd8a48931525.png filter=lfs diff=lfs merge=lfs -text
|
| 922 |
+
illustrious_generated/7368d4c82b5f.png filter=lfs diff=lfs merge=lfs -text
|
| 923 |
+
illustrious_generated/c7e1a60c0f5d.png filter=lfs diff=lfs merge=lfs -text
|
| 924 |
+
illustrious_generated/be56d67f1e08.png filter=lfs diff=lfs merge=lfs -text
|
| 925 |
+
illustrious_generated/269ee6e9a79c.png filter=lfs diff=lfs merge=lfs -text
|
| 926 |
+
illustrious_generated/2bb0e99b92bc.png filter=lfs diff=lfs merge=lfs -text
|
| 927 |
+
illustrious_generated/afd28993674d.png filter=lfs diff=lfs merge=lfs -text
|
| 928 |
+
illustrious_generated/585afc2017e2.png filter=lfs diff=lfs merge=lfs -text
|
| 929 |
+
illustrious_generated/f9c5bdc8bef5.png filter=lfs diff=lfs merge=lfs -text
|
| 930 |
+
illustrious_generated/8f338d47820a.png filter=lfs diff=lfs merge=lfs -text
|
| 931 |
+
illustrious_generated/e0443895d658.png filter=lfs diff=lfs merge=lfs -text
|
| 932 |
+
illustrious_generated/67ea9c16fed3.png filter=lfs diff=lfs merge=lfs -text
|
| 933 |
+
illustrious_generated/78dfdb4f0521.png filter=lfs diff=lfs merge=lfs -text
|
| 934 |
+
illustrious_generated/fff7c0390e8a.png filter=lfs diff=lfs merge=lfs -text
|
| 935 |
+
illustrious_generated/c63799030196.png filter=lfs diff=lfs merge=lfs -text
|
| 936 |
+
illustrious_generated/fc061ac787c7.png filter=lfs diff=lfs merge=lfs -text
|
| 937 |
+
illustrious_generated/26185801988b.png filter=lfs diff=lfs merge=lfs -text
|
| 938 |
+
illustrious_generated/656abae8d0b6.png filter=lfs diff=lfs merge=lfs -text
|
| 939 |
+
illustrious_generated/5c4a2ea8f842.png filter=lfs diff=lfs merge=lfs -text
|
| 940 |
+
illustrious_generated/2286bf835a6b.png filter=lfs diff=lfs merge=lfs -text
|
| 941 |
+
illustrious_generated/dc7501a6f47f.png filter=lfs diff=lfs merge=lfs -text
|
| 942 |
+
illustrious_generated/38b5363061d5.png filter=lfs diff=lfs merge=lfs -text
|
| 943 |
+
illustrious_generated/451e48977b1a.png filter=lfs diff=lfs merge=lfs -text
|
| 944 |
+
illustrious_generated/f7621703575c.png filter=lfs diff=lfs merge=lfs -text
|
| 945 |
+
illustrious_generated/891dc839571c.png filter=lfs diff=lfs merge=lfs -text
|
| 946 |
+
illustrious_generated/d1e30fd687b5.png filter=lfs diff=lfs merge=lfs -text
|
| 947 |
+
illustrious_generated/d1413371999b.png filter=lfs diff=lfs merge=lfs -text
|
| 948 |
+
illustrious_generated/0ad3307ea09c.png filter=lfs diff=lfs merge=lfs -text
|
| 949 |
+
illustrious_generated/6fba429dafc5.png filter=lfs diff=lfs merge=lfs -text
|
| 950 |
+
illustrious_generated/481f3834876a.png filter=lfs diff=lfs merge=lfs -text
|
| 951 |
+
illustrious_generated/1e54c0c78134.png filter=lfs diff=lfs merge=lfs -text
|
| 952 |
+
illustrious_generated/a564e408f362.png filter=lfs diff=lfs merge=lfs -text
|
| 953 |
+
illustrious_generated/ec6650b62802.png filter=lfs diff=lfs merge=lfs -text
|
| 954 |
+
illustrious_generated/9f447e4cf3d7.png filter=lfs diff=lfs merge=lfs -text
|
| 955 |
+
illustrious_generated/790ece21df10.png filter=lfs diff=lfs merge=lfs -text
|
| 956 |
+
illustrious_generated/75e576f27cb6.png filter=lfs diff=lfs merge=lfs -text
|
| 957 |
+
illustrious_generated/205b715d279f.png filter=lfs diff=lfs merge=lfs -text
|
| 958 |
+
illustrious_generated/060e926dcc0a.png filter=lfs diff=lfs merge=lfs -text
|
| 959 |
+
illustrious_generated/733c86338921.png filter=lfs diff=lfs merge=lfs -text
|
| 960 |
+
illustrious_generated/b9f37572031b.png filter=lfs diff=lfs merge=lfs -text
|
| 961 |
+
illustrious_generated/43eeb1fb403b.png filter=lfs diff=lfs merge=lfs -text
|
| 962 |
+
illustrious_generated/d22ef7243fac.png filter=lfs diff=lfs merge=lfs -text
|
| 963 |
+
illustrious_generated/162e3face5a7.png filter=lfs diff=lfs merge=lfs -text
|
| 964 |
+
illustrious_generated/765bf9d23c7e.png filter=lfs diff=lfs merge=lfs -text
|
| 965 |
+
illustrious_generated/47418c15a58f.png filter=lfs diff=lfs merge=lfs -text
|
| 966 |
+
illustrious_generated/3030bee9df5a.png filter=lfs diff=lfs merge=lfs -text
|
| 967 |
+
illustrious_generated/e4acb93d313c.png filter=lfs diff=lfs merge=lfs -text
|
| 968 |
+
illustrious_generated/08e454ab01c2.png filter=lfs diff=lfs merge=lfs -text
|
| 969 |
+
illustrious_generated/3f43e650c7d7.png filter=lfs diff=lfs merge=lfs -text
|
| 970 |
+
illustrious_generated/085929212457.png filter=lfs diff=lfs merge=lfs -text
|
| 971 |
+
illustrious_generated/91d346543b7c.png filter=lfs diff=lfs merge=lfs -text
|
| 972 |
+
illustrious_generated/891abd7c9fa3.png filter=lfs diff=lfs merge=lfs -text
|
| 973 |
+
illustrious_generated/1927adcb399a.png filter=lfs diff=lfs merge=lfs -text
|
| 974 |
+
illustrious_generated/7e49e6b5a30b.png filter=lfs diff=lfs merge=lfs -text
|
| 975 |
+
illustrious_generated/2cd36314054f.png filter=lfs diff=lfs merge=lfs -text
|
| 976 |
+
illustrious_generated/b569d3590c66.png filter=lfs diff=lfs merge=lfs -text
|
| 977 |
+
illustrious_generated/9e8dc59217e8.png filter=lfs diff=lfs merge=lfs -text
|
| 978 |
+
illustrious_generated/c2c3bea0e9d5.png filter=lfs diff=lfs merge=lfs -text
|
| 979 |
+
illustrious_generated/05972b153525.png filter=lfs diff=lfs merge=lfs -text
|
| 980 |
+
illustrious_generated/c9bf921e364a.png filter=lfs diff=lfs merge=lfs -text
|
| 981 |
+
illustrious_generated/13cdedc9c525.png filter=lfs diff=lfs merge=lfs -text
|
| 982 |
+
illustrious_generated/d8641bfcdd46.png filter=lfs diff=lfs merge=lfs -text
|
| 983 |
+
illustrious_generated/34afbd2725c8.png filter=lfs diff=lfs merge=lfs -text
|
| 984 |
+
illustrious_generated/f0d97f98333f.png filter=lfs diff=lfs merge=lfs -text
|
| 985 |
+
illustrious_generated/76b2de1037cb.png filter=lfs diff=lfs merge=lfs -text
|
| 986 |
+
illustrious_generated/a370eb471cd7.png filter=lfs diff=lfs merge=lfs -text
|
| 987 |
+
illustrious_generated/f5ab32c63fb8.png filter=lfs diff=lfs merge=lfs -text
|
| 988 |
+
illustrious_generated/5718f8172842.png filter=lfs diff=lfs merge=lfs -text
|
| 989 |
+
illustrious_generated/b7f508ecce88.png filter=lfs diff=lfs merge=lfs -text
|
| 990 |
+
illustrious_generated/5f147d77f3ed.png filter=lfs diff=lfs merge=lfs -text
|
| 991 |
+
illustrious_generated/ac9d950baac7.png filter=lfs diff=lfs merge=lfs -text
|
| 992 |
+
illustrious_generated/8b674edb3a4e.png filter=lfs diff=lfs merge=lfs -text
|
| 993 |
+
illustrious_generated/8ad0a744de62.png filter=lfs diff=lfs merge=lfs -text
|
| 994 |
+
illustrious_generated/5b8f74bcc260.png filter=lfs diff=lfs merge=lfs -text
|
| 995 |
+
illustrious_generated/78026f131004.png filter=lfs diff=lfs merge=lfs -text
|
| 996 |
+
illustrious_generated/d305fe437c6f.png filter=lfs diff=lfs merge=lfs -text
|
| 997 |
+
illustrious_generated/7cce990ade4c.png filter=lfs diff=lfs merge=lfs -text
|
| 998 |
+
illustrious_generated/c76729f0f827.png filter=lfs diff=lfs merge=lfs -text
|
| 999 |
+
illustrious_generated/0706f94ebdc3.png filter=lfs diff=lfs merge=lfs -text
|
| 1000 |
+
illustrious_generated/22af9def0424.png filter=lfs diff=lfs merge=lfs -text
|
| 1001 |
+
illustrious_generated/43877698ad33.png filter=lfs diff=lfs merge=lfs -text
|
| 1002 |
+
illustrious_generated/5a0201bebc6d.png filter=lfs diff=lfs merge=lfs -text
|
| 1003 |
+
illustrious_generated/7ad096e9b528.png filter=lfs diff=lfs merge=lfs -text
|
| 1004 |
+
illustrious_generated/46edb49b5dbf.png filter=lfs diff=lfs merge=lfs -text
|
| 1005 |
+
illustrious_generated/bd65b176bfe6.png filter=lfs diff=lfs merge=lfs -text
|
| 1006 |
+
illustrious_generated/073f299a3b06.png filter=lfs diff=lfs merge=lfs -text
|
| 1007 |
+
illustrious_generated/fc885c9be9af.png filter=lfs diff=lfs merge=lfs -text
|
| 1008 |
+
illustrious_generated/bcfc32b88c98.png filter=lfs diff=lfs merge=lfs -text
|
| 1009 |
+
illustrious_generated/e55e6cf94025.png filter=lfs diff=lfs merge=lfs -text
|
| 1010 |
+
illustrious_generated/b4a9600f3647.png filter=lfs diff=lfs merge=lfs -text
|
| 1011 |
+
illustrious_generated/d7ef34bf47ee.png filter=lfs diff=lfs merge=lfs -text
|
| 1012 |
+
illustrious_generated/8cbc6e1dbe62.png filter=lfs diff=lfs merge=lfs -text
|
| 1013 |
+
illustrious_generated/8633a3dff7ea.png filter=lfs diff=lfs merge=lfs -text
|
| 1014 |
+
illustrious_generated/cb335826ba02.png filter=lfs diff=lfs merge=lfs -text
|
| 1015 |
+
illustrious_generated/3048ba382498.png filter=lfs diff=lfs merge=lfs -text
|
| 1016 |
+
illustrious_generated/eca43ddadd85.png filter=lfs diff=lfs merge=lfs -text
|
| 1017 |
+
illustrious_generated/365e7d0f97c2.png filter=lfs diff=lfs merge=lfs -text
|
| 1018 |
+
illustrious_generated/e71b25950c5d.png filter=lfs diff=lfs merge=lfs -text
|
| 1019 |
+
illustrious_generated/59a595c825c8.png filter=lfs diff=lfs merge=lfs -text
|
| 1020 |
+
illustrious_generated/82ee8177ef04.png filter=lfs diff=lfs merge=lfs -text
|
| 1021 |
+
illustrious_generated/36915299353b.png filter=lfs diff=lfs merge=lfs -text
|
| 1022 |
+
illustrious_generated/ca07713b354c.png filter=lfs diff=lfs merge=lfs -text
|
| 1023 |
+
illustrious_generated/fbebd175667e.png filter=lfs diff=lfs merge=lfs -text
|
| 1024 |
+
illustrious_generated/dacfbbcd3fb3.png filter=lfs diff=lfs merge=lfs -text
|
| 1025 |
+
illustrious_generated/8a371dac467c.png filter=lfs diff=lfs merge=lfs -text
|
| 1026 |
+
illustrious_generated/40c498965cbd.png filter=lfs diff=lfs merge=lfs -text
|
| 1027 |
+
illustrious_generated/190beb9306ef.png filter=lfs diff=lfs merge=lfs -text
|
| 1028 |
+
illustrious_generated/bb2041beb345.png filter=lfs diff=lfs merge=lfs -text
|
| 1029 |
+
illustrious_generated/6f1c05af41ca.png filter=lfs diff=lfs merge=lfs -text
|
| 1030 |
+
illustrious_generated/9f741bd68919.png filter=lfs diff=lfs merge=lfs -text
|
| 1031 |
+
illustrious_generated/9bb815cccb98.png filter=lfs diff=lfs merge=lfs -text
|
| 1032 |
+
illustrious_generated/41d42d8f4842.png filter=lfs diff=lfs merge=lfs -text
|
| 1033 |
+
illustrious_generated/13166cbea867.png filter=lfs diff=lfs merge=lfs -text
|
| 1034 |
+
illustrious_generated/e2812aff73e9.png filter=lfs diff=lfs merge=lfs -text
|
| 1035 |
+
illustrious_generated/954594f7f0a6.png filter=lfs diff=lfs merge=lfs -text
|
| 1036 |
+
illustrious_generated/c4b5bff2dbc1.png filter=lfs diff=lfs merge=lfs -text
|
| 1037 |
+
illustrious_generated/980b174e831c.png filter=lfs diff=lfs merge=lfs -text
|
| 1038 |
+
illustrious_generated/ed89a47fd589.png filter=lfs diff=lfs merge=lfs -text
|
| 1039 |
+
illustrious_generated/a8e5c9011eef.png filter=lfs diff=lfs merge=lfs -text
|
| 1040 |
+
illustrious_generated/f1de13ffcad6.png filter=lfs diff=lfs merge=lfs -text
|
| 1041 |
+
illustrious_generated/574fba2c6515.png filter=lfs diff=lfs merge=lfs -text
|
| 1042 |
+
illustrious_generated/591e156ad5fd.png filter=lfs diff=lfs merge=lfs -text
|
| 1043 |
+
illustrious_generated/aef907db00ce.png filter=lfs diff=lfs merge=lfs -text
|
| 1044 |
+
illustrious_generated/3967f8d787ab.png filter=lfs diff=lfs merge=lfs -text
|
| 1045 |
+
illustrious_generated/a1ec0d3b0b0e.png filter=lfs diff=lfs merge=lfs -text
|
| 1046 |
+
illustrious_generated/9da135f5f21e.png filter=lfs diff=lfs merge=lfs -text
|
| 1047 |
+
illustrious_generated/8fd9fbffb954.png filter=lfs diff=lfs merge=lfs -text
|
| 1048 |
+
illustrious_generated/24e5b9fe7d38.png filter=lfs diff=lfs merge=lfs -text
|
| 1049 |
+
illustrious_generated/495f1b55919f.png filter=lfs diff=lfs merge=lfs -text
|
| 1050 |
+
illustrious_generated/19ff2ce2a961.png filter=lfs diff=lfs merge=lfs -text
|
| 1051 |
+
illustrious_generated/e39fecdd2676.png filter=lfs diff=lfs merge=lfs -text
|
| 1052 |
+
illustrious_generated/7663094bacec.png filter=lfs diff=lfs merge=lfs -text
|
| 1053 |
+
illustrious_generated/6d5feb7de870.png filter=lfs diff=lfs merge=lfs -text
|
| 1054 |
+
illustrious_generated/abe90752beb0.png filter=lfs diff=lfs merge=lfs -text
|
| 1055 |
+
illustrious_generated/cae43d7fd0f8.png filter=lfs diff=lfs merge=lfs -text
|
| 1056 |
+
illustrious_generated/3f5c59c8ee7b.png filter=lfs diff=lfs merge=lfs -text
|
| 1057 |
+
illustrious_generated/49712a2e71f1.png filter=lfs diff=lfs merge=lfs -text
|
| 1058 |
+
illustrious_generated/6346f39915f3.png filter=lfs diff=lfs merge=lfs -text
|
| 1059 |
+
illustrious_generated/4c6ea9681419.png filter=lfs diff=lfs merge=lfs -text
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (56.9 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_adapters.cpython-312.pyc
ADDED
|
Binary file (5.93 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_collections.cpython-312.pyc
ADDED
|
Binary file (1.98 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_compat.cpython-312.pyc
ADDED
|
Binary file (2.26 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_functools.cpython-312.pyc
ADDED
|
Binary file (3.49 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_itertools.cpython-312.pyc
ADDED
|
Binary file (6.49 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_meta.cpython-312.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_text.cpython-312.pyc
ADDED
|
Binary file (3.89 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/__pycache__/_typing.cpython-312.pyc
ADDED
|
Binary file (399 Bytes). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/__init__.py
ADDED
|
File without changes
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (202 Bytes). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py311.cpython-312.pyc
ADDED
|
Binary file (1.27 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py39.cpython-312.pyc
ADDED
|
Binary file (1.71 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/py311.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pathlib
|
| 3 |
+
import sys
|
| 4 |
+
import types
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def wrap(path): # pragma: no cover
|
| 8 |
+
"""
|
| 9 |
+
Workaround for https://github.com/python/cpython/issues/84538
|
| 10 |
+
to add backward compatibility for walk_up=True.
|
| 11 |
+
An example affected package is dask-labextension, which uses
|
| 12 |
+
jupyter-packaging to install JupyterLab javascript files outside
|
| 13 |
+
of site-packages.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def relative_to(root, *, walk_up=False):
|
| 17 |
+
return pathlib.Path(os.path.relpath(path, root))
|
| 18 |
+
|
| 19 |
+
return types.SimpleNamespace(relative_to=relative_to)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
relative_fix = wrap if sys.version_info < (3, 12) else lambda x: x
|
.venv/lib/python3.12/site-packages/importlib_metadata/compat/py39.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Compatibility layer with Python 3.8/3.9
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
from typing import TYPE_CHECKING, Any
|
| 8 |
+
|
| 9 |
+
if TYPE_CHECKING: # pragma: no cover
|
| 10 |
+
# Prevent circular imports on runtime.
|
| 11 |
+
from .. import Distribution, EntryPoint
|
| 12 |
+
else:
|
| 13 |
+
Distribution = EntryPoint = Any
|
| 14 |
+
|
| 15 |
+
from .._typing import md_none
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def normalized_name(dist: Distribution) -> str | None:
|
| 19 |
+
"""
|
| 20 |
+
Honor name normalization for distributions that don't provide ``_normalized_name``.
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
return dist._normalized_name
|
| 24 |
+
except AttributeError:
|
| 25 |
+
from .. import Prepared # -> delay to prevent circular imports.
|
| 26 |
+
|
| 27 |
+
return Prepared.normalize(
|
| 28 |
+
getattr(dist, "name", None) or md_none(dist.metadata)['Name']
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def ep_matches(ep: EntryPoint, **params) -> bool:
|
| 33 |
+
"""
|
| 34 |
+
Workaround for ``EntryPoint`` objects without the ``matches`` method.
|
| 35 |
+
"""
|
| 36 |
+
try:
|
| 37 |
+
return ep.matches(**params)
|
| 38 |
+
except AttributeError:
|
| 39 |
+
from .. import EntryPoint # -> delay to prevent circular imports.
|
| 40 |
+
|
| 41 |
+
# Reconstruct the EntryPoint object to make sure it is compatible.
|
| 42 |
+
return EntryPoint(ep.name, ep.value, ep.group).matches(**params)
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn.h
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/* cudnn : Neural Networks Library */
|
| 51 |
+
|
| 52 |
+
#if !defined(CUDNN_H_)
|
| 53 |
+
#define CUDNN_H_
|
| 54 |
+
#if defined(__cplusplus)
|
| 55 |
+
extern "C" {
|
| 56 |
+
#endif
|
| 57 |
+
|
| 58 |
+
#include <cuda_runtime_api.h>
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_graph.h"
|
| 61 |
+
#include "cudnn_ops.h"
|
| 62 |
+
#include "cudnn_adv.h"
|
| 63 |
+
#include "cudnn_cnn.h"
|
| 64 |
+
|
| 65 |
+
#if defined(__cplusplus)
|
| 66 |
+
}
|
| 67 |
+
#endif
|
| 68 |
+
#endif /* CUDNN_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_adv.h
ADDED
|
@@ -0,0 +1,669 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/* cudnn_adv : cuDNN's advanced and experimental features.
|
| 51 |
+
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_ADV_H_)
|
| 55 |
+
#define CUDNN_ADV_H_
|
| 56 |
+
|
| 57 |
+
#include <stdint.h>
|
| 58 |
+
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_ops.h"
|
| 61 |
+
|
| 62 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 63 |
+
#define CUDNN_ADV_MAJOR 9
|
| 64 |
+
#define CUDNN_ADV_MINOR 10
|
| 65 |
+
#define CUDNN_ADV_PATCH 2
|
| 66 |
+
|
| 67 |
+
#if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL)
|
| 68 |
+
#error Version mismatch in cuDNN ADV INFER!!!
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
#if defined(__cplusplus)
|
| 72 |
+
extern "C" {
|
| 73 |
+
#endif
|
| 74 |
+
|
| 75 |
+
/* BASIC RNN API */
|
| 76 |
+
|
| 77 |
+
typedef enum {
|
| 78 |
+
CUDNN_RNN_ALGO_STANDARD = 0,
|
| 79 |
+
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
|
| 80 |
+
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
|
| 81 |
+
CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
|
| 82 |
+
CUDNN_RNN_ALGO_COUNT = 4,
|
| 83 |
+
} cudnnRNNAlgo_t;
|
| 84 |
+
|
| 85 |
+
typedef enum {
|
| 86 |
+
CUDNN_FWD_MODE_INFERENCE = 0,
|
| 87 |
+
CUDNN_FWD_MODE_TRAINING = 1,
|
| 88 |
+
} cudnnForwardMode_t;
|
| 89 |
+
|
| 90 |
+
typedef enum {
|
| 91 |
+
CUDNN_RNN_RELU = 0, /* basic RNN cell type with ReLu activation */
|
| 92 |
+
CUDNN_RNN_TANH = 1, /* basic RNN cell type with tanh activation */
|
| 93 |
+
CUDNN_LSTM = 2, /* LSTM with optional recurrent projection and clipping */
|
| 94 |
+
CUDNN_GRU = 3, /* Using h' = tanh(r * Uh(t-1) + Wx) and h = (1 - z) * h' + z * h(t-1); */
|
| 95 |
+
} cudnnRNNMode_t;
|
| 96 |
+
|
| 97 |
+
typedef enum {
|
| 98 |
+
CUDNN_RNN_NO_BIAS = 0, /* rnn cell formulas do not use biases */
|
| 99 |
+
CUDNN_RNN_SINGLE_INP_BIAS = 1, /* rnn cell formulas use one input bias in input GEMM */
|
| 100 |
+
CUDNN_RNN_DOUBLE_BIAS = 2, /* default, rnn cell formulas use two bias vectors */
|
| 101 |
+
CUDNN_RNN_SINGLE_REC_BIAS = 3 /* rnn cell formulas use one recurrent bias in recurrent GEMM */
|
| 102 |
+
} cudnnRNNBiasMode_t;
|
| 103 |
+
|
| 104 |
+
typedef enum {
|
| 105 |
+
CUDNN_UNIDIRECTIONAL = 0, /* single direction network */
|
| 106 |
+
CUDNN_BIDIRECTIONAL = 1, /* output concatination at each layer */
|
| 107 |
+
} cudnnDirectionMode_t;
|
| 108 |
+
|
| 109 |
+
typedef enum {
|
| 110 |
+
CUDNN_LINEAR_INPUT = 0, /* adjustable weight matrix in first layer input GEMM */
|
| 111 |
+
CUDNN_SKIP_INPUT = 1, /* fixed identity matrix in the first layer input GEMM */
|
| 112 |
+
} cudnnRNNInputMode_t;
|
| 113 |
+
|
| 114 |
+
typedef enum {
|
| 115 |
+
CUDNN_RNN_CLIP_NONE = 0, /* disables LSTM cell clipping */
|
| 116 |
+
CUDNN_RNN_CLIP_MINMAX = 1, /* enables LSTM cell clipping */
|
| 117 |
+
} cudnnRNNClipMode_t;
|
| 118 |
+
|
| 119 |
+
typedef enum {
|
| 120 |
+
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED = 0, /* padded, outer stride from one time-step to the next */
|
| 121 |
+
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED = 1, /* sequence length sorted and packed as in basic RNN api */
|
| 122 |
+
CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2, /* padded, outer stride from one batch to the next */
|
| 123 |
+
} cudnnRNNDataLayout_t;
|
| 124 |
+
|
| 125 |
+
/* For auxFlags in cudnnSetRNNDescriptor_v8() */
|
| 126 |
+
#define CUDNN_RNN_PADDED_IO_DISABLED 0
|
| 127 |
+
#define CUDNN_RNN_PADDED_IO_ENABLED (1U << 0)
|
| 128 |
+
|
| 129 |
+
struct cudnnRNNStruct;
|
| 130 |
+
typedef struct cudnnRNNStruct *cudnnRNNDescriptor_t;
|
| 131 |
+
|
| 132 |
+
struct cudnnRNNDataStruct;
|
| 133 |
+
typedef struct cudnnRNNDataStruct *cudnnRNNDataDescriptor_t;
|
| 134 |
+
|
| 135 |
+
cudnnStatus_t CUDNNWINAPI
|
| 136 |
+
cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc);
|
| 137 |
+
|
| 138 |
+
cudnnStatus_t CUDNNWINAPI
|
| 139 |
+
cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc);
|
| 140 |
+
|
| 141 |
+
/*
|
| 142 |
+
* mathPrec in cudnnSetRNNDescriptor_v8() specifies compute precision.
|
| 143 |
+
* Compute precision is further modified by mathType that sets the
|
| 144 |
+
* preferred option for using NVIDIA Tensor Cores. dataType specify
|
| 145 |
+
* input/output data type and weight/bias type.
|
| 146 |
+
*/
|
| 147 |
+
|
| 148 |
+
cudnnStatus_t CUDNNWINAPI
|
| 149 |
+
cudnnSetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 150 |
+
cudnnRNNAlgo_t algo,
|
| 151 |
+
cudnnRNNMode_t cellMode,
|
| 152 |
+
cudnnRNNBiasMode_t biasMode,
|
| 153 |
+
cudnnDirectionMode_t dirMode,
|
| 154 |
+
cudnnRNNInputMode_t inputMode,
|
| 155 |
+
cudnnDataType_t dataType,
|
| 156 |
+
cudnnDataType_t mathPrec,
|
| 157 |
+
cudnnMathType_t mathType,
|
| 158 |
+
int32_t inputSize,
|
| 159 |
+
int32_t hiddenSize,
|
| 160 |
+
int32_t projSize,
|
| 161 |
+
int32_t numLayers,
|
| 162 |
+
cudnnDropoutDescriptor_t dropoutDesc,
|
| 163 |
+
uint32_t auxFlags);
|
| 164 |
+
|
| 165 |
+
cudnnStatus_t CUDNNWINAPI
|
| 166 |
+
cudnnGetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 167 |
+
cudnnRNNAlgo_t *algo,
|
| 168 |
+
cudnnRNNMode_t *cellMode,
|
| 169 |
+
cudnnRNNBiasMode_t *biasMode,
|
| 170 |
+
cudnnDirectionMode_t *dirMode,
|
| 171 |
+
cudnnRNNInputMode_t *inputMode,
|
| 172 |
+
cudnnDataType_t *dataType,
|
| 173 |
+
cudnnDataType_t *mathPrec,
|
| 174 |
+
cudnnMathType_t *mathType,
|
| 175 |
+
int32_t *inputSize,
|
| 176 |
+
int32_t *hiddenSize,
|
| 177 |
+
int32_t *projSize,
|
| 178 |
+
int32_t *numLayers,
|
| 179 |
+
cudnnDropoutDescriptor_t *dropoutDesc,
|
| 180 |
+
uint32_t *auxFlags);
|
| 181 |
+
|
| 182 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 183 |
+
cudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 184 |
+
cudnnRNNClipMode_t clipMode,
|
| 185 |
+
cudnnNanPropagation_t clipNanOpt,
|
| 186 |
+
double lclip,
|
| 187 |
+
double rclip);
|
| 188 |
+
|
| 189 |
+
cudnnStatus_t CUDNNWINAPI
|
| 190 |
+
cudnnRNNSetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, double lclip, double rclip);
|
| 191 |
+
|
| 192 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 193 |
+
cudnnRNNGetClip_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 194 |
+
cudnnRNNClipMode_t *clipMode,
|
| 195 |
+
cudnnNanPropagation_t *clipNanOpt,
|
| 196 |
+
double *lclip,
|
| 197 |
+
double *rclip);
|
| 198 |
+
|
| 199 |
+
cudnnStatus_t CUDNNWINAPI
|
| 200 |
+
cudnnRNNGetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t *clipMode, double *lclip, double *rclip);
|
| 201 |
+
|
| 202 |
+
cudnnStatus_t CUDNNWINAPI
|
| 203 |
+
cudnnBuildRNNDynamic(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int miniBatch);
|
| 204 |
+
|
| 205 |
+
cudnnStatus_t CUDNNWINAPI
|
| 206 |
+
cudnnGetRNNTempSpaceSizes(cudnnHandle_t handle,
|
| 207 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 208 |
+
cudnnForwardMode_t fwdMode,
|
| 209 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 210 |
+
size_t *workSpaceSize,
|
| 211 |
+
size_t *reserveSpaceSize);
|
| 212 |
+
|
| 213 |
+
cudnnStatus_t CUDNNWINAPI
|
| 214 |
+
cudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, size_t *weightSpaceSize);
|
| 215 |
+
|
| 216 |
+
cudnnStatus_t CUDNNWINAPI
|
| 217 |
+
cudnnGetRNNWeightParams(cudnnHandle_t handle,
|
| 218 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 219 |
+
int32_t pseudoLayer,
|
| 220 |
+
size_t weightSpaceSize,
|
| 221 |
+
const void *weightSpace,
|
| 222 |
+
int32_t linLayerID,
|
| 223 |
+
cudnnTensorDescriptor_t mDesc,
|
| 224 |
+
void **mAddr,
|
| 225 |
+
cudnnTensorDescriptor_t bDesc,
|
| 226 |
+
void **bAddr);
|
| 227 |
+
|
| 228 |
+
cudnnStatus_t CUDNNWINAPI
|
| 229 |
+
cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc);
|
| 230 |
+
|
| 231 |
+
cudnnStatus_t CUDNNWINAPI
|
| 232 |
+
cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc);
|
| 233 |
+
|
| 234 |
+
cudnnStatus_t CUDNNWINAPI
|
| 235 |
+
cudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
|
| 236 |
+
cudnnDataType_t dataType,
|
| 237 |
+
cudnnRNNDataLayout_t layout,
|
| 238 |
+
int maxSeqLength,
|
| 239 |
+
int batchSize,
|
| 240 |
+
int vectorSize,
|
| 241 |
+
const int seqLengthArray[], /* length of each sequence in the batch */
|
| 242 |
+
void *paddingFill); /* symbol for filling padding position in output */
|
| 243 |
+
|
| 244 |
+
cudnnStatus_t CUDNNWINAPI
|
| 245 |
+
cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
|
| 246 |
+
cudnnDataType_t *dataType,
|
| 247 |
+
cudnnRNNDataLayout_t *layout,
|
| 248 |
+
int *maxSeqLength,
|
| 249 |
+
int *batchSize,
|
| 250 |
+
int *vectorSize,
|
| 251 |
+
int arrayLengthRequested,
|
| 252 |
+
int seqLengthArray[],
|
| 253 |
+
void *paddingFill);
|
| 254 |
+
|
| 255 |
+
cudnnStatus_t CUDNNWINAPI
|
| 256 |
+
cudnnRNNForward(cudnnHandle_t handle,
|
| 257 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 258 |
+
cudnnForwardMode_t fwdMode,
|
| 259 |
+
const int32_t devSeqLengths[],
|
| 260 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 261 |
+
const void *x,
|
| 262 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 263 |
+
void *y,
|
| 264 |
+
cudnnTensorDescriptor_t hDesc,
|
| 265 |
+
const void *hx,
|
| 266 |
+
void *hy,
|
| 267 |
+
cudnnTensorDescriptor_t cDesc,
|
| 268 |
+
const void *cx,
|
| 269 |
+
void *cy,
|
| 270 |
+
size_t weightSpaceSize,
|
| 271 |
+
const void *weightSpace,
|
| 272 |
+
size_t workSpaceSize,
|
| 273 |
+
void *workSpace,
|
| 274 |
+
size_t reserveSpaceSize,
|
| 275 |
+
void *reserveSpace);
|
| 276 |
+
|
| 277 |
+
/* Sequence data descriptor */
|
| 278 |
+
|
| 279 |
+
typedef enum {
|
| 280 |
+
CUDNN_SEQDATA_TIME_DIM = 0, /* index in time */
|
| 281 |
+
CUDNN_SEQDATA_BATCH_DIM = 1, /* index in batch */
|
| 282 |
+
CUDNN_SEQDATA_BEAM_DIM = 2, /* index in beam */
|
| 283 |
+
CUDNN_SEQDATA_VECT_DIM = 3 /* index in vector */
|
| 284 |
+
} cudnnSeqDataAxis_t;
|
| 285 |
+
|
| 286 |
+
struct cudnnSeqDataStruct;
|
| 287 |
+
typedef struct cudnnSeqDataStruct *cudnnSeqDataDescriptor_t CUDNN_DEPRECATED;
|
| 288 |
+
|
| 289 |
+
#define CUDNN_SEQDATA_DIM_COUNT 4 /* dimension count */
|
| 290 |
+
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc);
|
| 293 |
+
|
| 294 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 295 |
+
cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc);
|
| 296 |
+
|
| 297 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 298 |
+
cudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc,
|
| 299 |
+
cudnnDataType_t dataType,
|
| 300 |
+
int nbDims,
|
| 301 |
+
const int dimA[],
|
| 302 |
+
const cudnnSeqDataAxis_t axes[],
|
| 303 |
+
size_t seqLengthArraySize,
|
| 304 |
+
const int seqLengthArray[],
|
| 305 |
+
void *paddingFill);
|
| 306 |
+
|
| 307 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 308 |
+
cudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc,
|
| 309 |
+
cudnnDataType_t *dataType,
|
| 310 |
+
int *nbDims,
|
| 311 |
+
int nbDimsRequested,
|
| 312 |
+
int dimA[],
|
| 313 |
+
cudnnSeqDataAxis_t axes[],
|
| 314 |
+
size_t *seqLengthArraySize,
|
| 315 |
+
size_t seqLengthSizeRequested,
|
| 316 |
+
int seqLengthArray[],
|
| 317 |
+
void *paddingFill);
|
| 318 |
+
|
| 319 |
+
/* Multihead Attention */
|
| 320 |
+
|
| 321 |
+
/*
|
| 322 |
+
* Multi-head attention options passed via 'attnMode' in cudnnSetAttnDescriptor().
|
| 323 |
+
* Use the bitwise OR operator to combine several settings listed below. Additional
|
| 324 |
+
* minor options can be added here w/o changing or introducing new API functions.
|
| 325 |
+
*/
|
| 326 |
+
#define CUDNN_ATTN_QUERYMAP_ALL_TO_ONE 0 /* multiple Q-s map to a single (K,V) set when beam size > 1 */
|
| 327 |
+
#define CUDNN_ATTN_QUERYMAP_ONE_TO_ONE (1U << 0) /* multiple Q-s map to multiple (K,V) sets when beam size > 1 */
|
| 328 |
+
#define CUDNN_ATTN_DISABLE_PROJ_BIASES 0 /* no biases in attention input and output projections */
|
| 329 |
+
#define CUDNN_ATTN_ENABLE_PROJ_BIASES (1U << 1) /* use biases in attention input and output projections */
|
| 330 |
+
|
| 331 |
+
struct cudnnAttnStruct;
|
| 332 |
+
typedef struct cudnnAttnStruct *cudnnAttnDescriptor_t CUDNN_DEPRECATED;
|
| 333 |
+
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc);
|
| 336 |
+
|
| 337 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 338 |
+
cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc);
|
| 339 |
+
|
| 340 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 341 |
+
cudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
|
| 342 |
+
unsigned attnMode,
|
| 343 |
+
int nHeads,
|
| 344 |
+
double smScaler,
|
| 345 |
+
cudnnDataType_t dataType,
|
| 346 |
+
cudnnDataType_t computePrec,
|
| 347 |
+
cudnnMathType_t mathType,
|
| 348 |
+
cudnnDropoutDescriptor_t attnDropoutDesc,
|
| 349 |
+
cudnnDropoutDescriptor_t postDropoutDesc,
|
| 350 |
+
int qSize,
|
| 351 |
+
int kSize,
|
| 352 |
+
int vSize,
|
| 353 |
+
int qProjSize,
|
| 354 |
+
int kProjSize,
|
| 355 |
+
int vProjSize,
|
| 356 |
+
int oProjSize,
|
| 357 |
+
int qoMaxSeqLength,
|
| 358 |
+
int kvMaxSeqLength,
|
| 359 |
+
int maxBatchSize,
|
| 360 |
+
int maxBeamSize);
|
| 361 |
+
|
| 362 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 363 |
+
cudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
|
| 364 |
+
unsigned *attnMode,
|
| 365 |
+
int *nHeads,
|
| 366 |
+
double *smScaler,
|
| 367 |
+
cudnnDataType_t *dataType,
|
| 368 |
+
cudnnDataType_t *computePrec,
|
| 369 |
+
cudnnMathType_t *mathType,
|
| 370 |
+
cudnnDropoutDescriptor_t *attnDropoutDesc,
|
| 371 |
+
cudnnDropoutDescriptor_t *postDropoutDesc,
|
| 372 |
+
int *qSize,
|
| 373 |
+
int *kSize,
|
| 374 |
+
int *vSize,
|
| 375 |
+
int *qProjSize,
|
| 376 |
+
int *kProjSize,
|
| 377 |
+
int *vProjSize,
|
| 378 |
+
int *oProjSize,
|
| 379 |
+
int *qoMaxSeqLength,
|
| 380 |
+
int *kvMaxSeqLength,
|
| 381 |
+
int *maxBatchSize,
|
| 382 |
+
int *maxBeamSize);
|
| 383 |
+
|
| 384 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 385 |
+
cudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle,
|
| 386 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 387 |
+
size_t *weightSizeInBytes,
|
| 388 |
+
size_t *workSpaceSizeInBytes,
|
| 389 |
+
size_t *reserveSpaceSizeInBytes);
|
| 390 |
+
|
| 391 |
+
typedef enum {
|
| 392 |
+
CUDNN_MH_ATTN_Q_WEIGHTS = 0, /* input projection weights for 'queries' */
|
| 393 |
+
CUDNN_MH_ATTN_K_WEIGHTS = 1, /* input projection weights for 'keys' */
|
| 394 |
+
CUDNN_MH_ATTN_V_WEIGHTS = 2, /* input projection weights for 'values' */
|
| 395 |
+
CUDNN_MH_ATTN_O_WEIGHTS = 3, /* output projection weights */
|
| 396 |
+
CUDNN_MH_ATTN_Q_BIASES = 4, /* input projection bias tensor for 'queries' */
|
| 397 |
+
CUDNN_MH_ATTN_K_BIASES = 5, /* input projection bias for 'keys' */
|
| 398 |
+
CUDNN_MH_ATTN_V_BIASES = 6, /* input projection bias for 'values' */
|
| 399 |
+
CUDNN_MH_ATTN_O_BIASES = 7, /* output projection biases */
|
| 400 |
+
} cudnnMultiHeadAttnWeightKind_t;
|
| 401 |
+
|
| 402 |
+
#define CUDNN_ATTN_WKIND_COUNT 8 /* Number of attention weight/bias tensors */
|
| 403 |
+
|
| 404 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 405 |
+
cudnnGetMultiHeadAttnWeights(cudnnHandle_t handle,
|
| 406 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 407 |
+
cudnnMultiHeadAttnWeightKind_t wKind,
|
| 408 |
+
size_t weightSizeInBytes,
|
| 409 |
+
const void *weights,
|
| 410 |
+
cudnnTensorDescriptor_t wDesc,
|
| 411 |
+
void **wAddr);
|
| 412 |
+
|
| 413 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 414 |
+
cudnnMultiHeadAttnForward(cudnnHandle_t handle,
|
| 415 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 416 |
+
int currIdx,
|
| 417 |
+
const int loWinIdx[],
|
| 418 |
+
const int hiWinIdx[],
|
| 419 |
+
const int devSeqLengthsQO[],
|
| 420 |
+
const int devSeqLengthsKV[],
|
| 421 |
+
const cudnnSeqDataDescriptor_t qDesc,
|
| 422 |
+
const void *queries,
|
| 423 |
+
const void *residuals,
|
| 424 |
+
const cudnnSeqDataDescriptor_t kDesc,
|
| 425 |
+
const void *keys,
|
| 426 |
+
const cudnnSeqDataDescriptor_t vDesc,
|
| 427 |
+
const void *values,
|
| 428 |
+
const cudnnSeqDataDescriptor_t oDesc,
|
| 429 |
+
void *out,
|
| 430 |
+
size_t weightSizeInBytes,
|
| 431 |
+
const void *weights,
|
| 432 |
+
size_t workSpaceSizeInBytes,
|
| 433 |
+
void *workSpace,
|
| 434 |
+
size_t reserveSpaceSizeInBytes,
|
| 435 |
+
void *reserveSpace);
|
| 436 |
+
|
| 437 |
+
/*
|
| 438 |
+
* \brief Cross-library version checker.
|
| 439 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 440 |
+
* checks whether its own version matches that of its dependencies.
|
| 441 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 442 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 443 |
+
*/
|
| 444 |
+
cudnnStatus_t CUDNNWINAPI
|
| 445 |
+
cudnnAdvVersionCheck(void);
|
| 446 |
+
|
| 447 |
+
typedef enum {
|
| 448 |
+
CUDNN_WGRAD_MODE_ADD = 0, /* add partial gradients to wgrad output buffers */
|
| 449 |
+
CUDNN_WGRAD_MODE_SET = 1, /* write partial gradients to wgrad output buffers */
|
| 450 |
+
} cudnnWgradMode_t;
|
| 451 |
+
|
| 452 |
+
cudnnStatus_t CUDNNWINAPI
|
| 453 |
+
cudnnRNNBackwardData_v8(cudnnHandle_t handle,
|
| 454 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 455 |
+
const int32_t devSeqLengths[],
|
| 456 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 457 |
+
const void *y,
|
| 458 |
+
const void *dy,
|
| 459 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 460 |
+
void *dx,
|
| 461 |
+
cudnnTensorDescriptor_t hDesc,
|
| 462 |
+
const void *hx,
|
| 463 |
+
const void *dhy,
|
| 464 |
+
void *dhx,
|
| 465 |
+
cudnnTensorDescriptor_t cDesc,
|
| 466 |
+
const void *cx,
|
| 467 |
+
const void *dcy,
|
| 468 |
+
void *dcx,
|
| 469 |
+
size_t weightSpaceSize,
|
| 470 |
+
const void *weightSpace,
|
| 471 |
+
size_t workSpaceSize,
|
| 472 |
+
void *workSpace,
|
| 473 |
+
size_t reserveSpaceSize,
|
| 474 |
+
void *reserveSpace);
|
| 475 |
+
|
| 476 |
+
cudnnStatus_t CUDNNWINAPI
|
| 477 |
+
cudnnRNNBackwardWeights_v8(cudnnHandle_t handle,
|
| 478 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 479 |
+
cudnnWgradMode_t addGrad,
|
| 480 |
+
const int32_t devSeqLengths[],
|
| 481 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 482 |
+
const void *x,
|
| 483 |
+
cudnnTensorDescriptor_t hDesc,
|
| 484 |
+
const void *hx,
|
| 485 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 486 |
+
const void *y,
|
| 487 |
+
size_t weightSpaceSize,
|
| 488 |
+
void *dweightSpace,
|
| 489 |
+
size_t workSpaceSize,
|
| 490 |
+
void *workSpace,
|
| 491 |
+
size_t reserveSpaceSize,
|
| 492 |
+
void *reserveSpace);
|
| 493 |
+
|
| 494 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 495 |
+
cudnnMultiHeadAttnBackwardData(cudnnHandle_t handle,
|
| 496 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 497 |
+
const int loWinIdx[],
|
| 498 |
+
const int hiWinIdx[],
|
| 499 |
+
const int devSeqLengthsDQDO[],
|
| 500 |
+
const int devSeqLengthsDKDV[],
|
| 501 |
+
const cudnnSeqDataDescriptor_t doDesc,
|
| 502 |
+
const void *dout,
|
| 503 |
+
const cudnnSeqDataDescriptor_t dqDesc,
|
| 504 |
+
void *dqueries,
|
| 505 |
+
const void *queries,
|
| 506 |
+
const cudnnSeqDataDescriptor_t dkDesc,
|
| 507 |
+
void *dkeys,
|
| 508 |
+
const void *keys,
|
| 509 |
+
const cudnnSeqDataDescriptor_t dvDesc,
|
| 510 |
+
void *dvalues,
|
| 511 |
+
const void *values,
|
| 512 |
+
size_t weightSizeInBytes,
|
| 513 |
+
const void *weights,
|
| 514 |
+
size_t workSpaceSizeInBytes,
|
| 515 |
+
void *workSpace,
|
| 516 |
+
size_t reserveSpaceSizeInBytes,
|
| 517 |
+
void *reserveSpace);
|
| 518 |
+
|
| 519 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 520 |
+
cudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle,
|
| 521 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 522 |
+
cudnnWgradMode_t addGrad,
|
| 523 |
+
const cudnnSeqDataDescriptor_t qDesc,
|
| 524 |
+
const void *queries,
|
| 525 |
+
const cudnnSeqDataDescriptor_t kDesc,
|
| 526 |
+
const void *keys,
|
| 527 |
+
const cudnnSeqDataDescriptor_t vDesc,
|
| 528 |
+
const void *values,
|
| 529 |
+
const cudnnSeqDataDescriptor_t doDesc,
|
| 530 |
+
const void *dout,
|
| 531 |
+
size_t weightSizeInBytes,
|
| 532 |
+
const void *weights,
|
| 533 |
+
void *dweights,
|
| 534 |
+
size_t workSpaceSizeInBytes,
|
| 535 |
+
void *workSpace,
|
| 536 |
+
size_t reserveSpaceSizeInBytes,
|
| 537 |
+
void *reserveSpace);
|
| 538 |
+
|
| 539 |
+
/*
|
| 540 |
+
* CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
|
| 541 |
+
*/
|
| 542 |
+
/* Input normalization mode for loss function */
|
| 543 |
+
typedef enum {
|
| 544 |
+
CUDNN_LOSS_NORMALIZATION_NONE = 0,
|
| 545 |
+
CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
|
| 546 |
+
} cudnnLossNormalizationMode_t;
|
| 547 |
+
|
| 548 |
+
cudnnStatus_t CUDNNWINAPI
|
| 549 |
+
cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc);
|
| 550 |
+
|
| 551 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 552 |
+
cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
|
| 553 |
+
|
| 554 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 555 |
+
cudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 556 |
+
cudnnDataType_t compType,
|
| 557 |
+
cudnnLossNormalizationMode_t normMode,
|
| 558 |
+
cudnnNanPropagation_t gradMode);
|
| 559 |
+
|
| 560 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 561 |
+
cudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 562 |
+
cudnnDataType_t compType,
|
| 563 |
+
cudnnLossNormalizationMode_t normMode,
|
| 564 |
+
cudnnNanPropagation_t gradMode,
|
| 565 |
+
int maxLabelLength);
|
| 566 |
+
|
| 567 |
+
cudnnStatus_t CUDNNWINAPI
|
| 568 |
+
cudnnSetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 569 |
+
cudnnDataType_t compType,
|
| 570 |
+
cudnnLossNormalizationMode_t normMode,
|
| 571 |
+
cudnnCTCGradMode_t ctcGradMode,
|
| 572 |
+
int maxLabelLength);
|
| 573 |
+
|
| 574 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 575 |
+
cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType);
|
| 576 |
+
|
| 577 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 578 |
+
cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 579 |
+
cudnnDataType_t *compType,
|
| 580 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 581 |
+
cudnnNanPropagation_t *gradMode);
|
| 582 |
+
|
| 583 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 584 |
+
cudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 585 |
+
cudnnDataType_t *compType,
|
| 586 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 587 |
+
cudnnNanPropagation_t *gradMode,
|
| 588 |
+
int *maxLabelLength);
|
| 589 |
+
|
| 590 |
+
cudnnStatus_t CUDNNWINAPI
|
| 591 |
+
cudnnGetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 592 |
+
cudnnDataType_t *compType,
|
| 593 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 594 |
+
cudnnCTCGradMode_t *ctcGradMode,
|
| 595 |
+
int *maxLabelLength);
|
| 596 |
+
|
| 597 |
+
cudnnStatus_t CUDNNWINAPI
|
| 598 |
+
cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
|
| 599 |
+
|
| 600 |
+
/* return the ctc costs and gradients, given the probabilities and labels */
|
| 601 |
+
cudnnStatus_t CUDNNWINAPI
|
| 602 |
+
cudnnCTCLoss(
|
| 603 |
+
cudnnHandle_t handle,
|
| 604 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 605 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 606 |
+
const void *probs, /* probabilities after softmax, in GPU memory */
|
| 607 |
+
const int hostLabels[], /* labels, in CPU memory */
|
| 608 |
+
const int hostLabelLengths[], /* the length of each label, in CPU memory */
|
| 609 |
+
const int hostInputLengths[], /* the lengths of timing steps in each batch, in CPU memory */
|
| 610 |
+
void *costs, /* the returned costs of CTC, in GPU memory */
|
| 611 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
|
| 612 |
+
void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
|
| 613 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 614 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 615 |
+
void *workspace, /* pointer to the workspace, in GPU memory */
|
| 616 |
+
size_t workSpaceSizeInBytes); /* size of the workspace */
|
| 617 |
+
|
| 618 |
+
/* return the ctc costs and gradients, given the probabilities and labels */
|
| 619 |
+
cudnnStatus_t CUDNNWINAPI
|
| 620 |
+
cudnnCTCLoss_v8(
|
| 621 |
+
cudnnHandle_t handle,
|
| 622 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 623 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 624 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 625 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 626 |
+
const void *probs, /* probabilities after softmax, in GPU memory */
|
| 627 |
+
const int labels[], /* labels, in GPU memory */
|
| 628 |
+
const int labelLengths[], /* the length of each label, in GPU memory */
|
| 629 |
+
const int inputLengths[], /* the lengths of timing steps in each batch, in GPU memory */
|
| 630 |
+
void *costs, /* the returned costs of CTC, in GPU memory */
|
| 631 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
|
| 632 |
+
void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
|
| 633 |
+
size_t workSpaceSizeInBytes, /* size of the workspace */
|
| 634 |
+
void *workspace); /* pointer to the workspace, in GPU memory */
|
| 635 |
+
|
| 636 |
+
/* return the workspace size needed for ctc */
|
| 637 |
+
cudnnStatus_t CUDNNWINAPI
|
| 638 |
+
cudnnGetCTCLossWorkspaceSize(
|
| 639 |
+
cudnnHandle_t handle,
|
| 640 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 641 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 642 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
|
| 643 |
+
dimensions are T,N,A. To compute costs
|
| 644 |
+
only, set it to NULL */
|
| 645 |
+
const int *labels, /* labels, in CPU memory */
|
| 646 |
+
const int *labelLengths, /* the length of each label, in CPU memory */
|
| 647 |
+
const int *inputLengths, /* the lengths of timing steps in each batch, in CPU memory */
|
| 648 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 649 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 650 |
+
size_t *sizeInBytes); /* pointer to the returned workspace size */
|
| 651 |
+
|
| 652 |
+
/* return the workspace size needed for ctc */
|
| 653 |
+
cudnnStatus_t CUDNNWINAPI
|
| 654 |
+
cudnnGetCTCLossWorkspaceSize_v8(
|
| 655 |
+
cudnnHandle_t handle,
|
| 656 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 657 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 658 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 659 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 660 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
|
| 661 |
+
dimensions are T,N,A. To compute costs
|
| 662 |
+
only, set it to NULL */
|
| 663 |
+
size_t *sizeInBytes); /* pointer to the returned workspace size */
|
| 664 |
+
|
| 665 |
+
#if defined(__cplusplus)
|
| 666 |
+
}
|
| 667 |
+
#endif
|
| 668 |
+
|
| 669 |
+
#endif /* CUDNN_ADV_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_adv_v9.h
ADDED
|
@@ -0,0 +1,669 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/* cudnn_adv : cuDNN's advanced and experimental features.
|
| 51 |
+
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_ADV_H_)
|
| 55 |
+
#define CUDNN_ADV_H_
|
| 56 |
+
|
| 57 |
+
#include <stdint.h>
|
| 58 |
+
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_ops.h"
|
| 61 |
+
|
| 62 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 63 |
+
#define CUDNN_ADV_MAJOR 9
|
| 64 |
+
#define CUDNN_ADV_MINOR 10
|
| 65 |
+
#define CUDNN_ADV_PATCH 2
|
| 66 |
+
|
| 67 |
+
#if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL)
|
| 68 |
+
#error Version mismatch in cuDNN ADV INFER!!!
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
#if defined(__cplusplus)
|
| 72 |
+
extern "C" {
|
| 73 |
+
#endif
|
| 74 |
+
|
| 75 |
+
/* BASIC RNN API */
|
| 76 |
+
|
| 77 |
+
typedef enum {
|
| 78 |
+
CUDNN_RNN_ALGO_STANDARD = 0,
|
| 79 |
+
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
|
| 80 |
+
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
|
| 81 |
+
CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
|
| 82 |
+
CUDNN_RNN_ALGO_COUNT = 4,
|
| 83 |
+
} cudnnRNNAlgo_t;
|
| 84 |
+
|
| 85 |
+
typedef enum {
|
| 86 |
+
CUDNN_FWD_MODE_INFERENCE = 0,
|
| 87 |
+
CUDNN_FWD_MODE_TRAINING = 1,
|
| 88 |
+
} cudnnForwardMode_t;
|
| 89 |
+
|
| 90 |
+
typedef enum {
|
| 91 |
+
CUDNN_RNN_RELU = 0, /* basic RNN cell type with ReLu activation */
|
| 92 |
+
CUDNN_RNN_TANH = 1, /* basic RNN cell type with tanh activation */
|
| 93 |
+
CUDNN_LSTM = 2, /* LSTM with optional recurrent projection and clipping */
|
| 94 |
+
CUDNN_GRU = 3, /* Using h' = tanh(r * Uh(t-1) + Wx) and h = (1 - z) * h' + z * h(t-1); */
|
| 95 |
+
} cudnnRNNMode_t;
|
| 96 |
+
|
| 97 |
+
typedef enum {
|
| 98 |
+
CUDNN_RNN_NO_BIAS = 0, /* rnn cell formulas do not use biases */
|
| 99 |
+
CUDNN_RNN_SINGLE_INP_BIAS = 1, /* rnn cell formulas use one input bias in input GEMM */
|
| 100 |
+
CUDNN_RNN_DOUBLE_BIAS = 2, /* default, rnn cell formulas use two bias vectors */
|
| 101 |
+
CUDNN_RNN_SINGLE_REC_BIAS = 3 /* rnn cell formulas use one recurrent bias in recurrent GEMM */
|
| 102 |
+
} cudnnRNNBiasMode_t;
|
| 103 |
+
|
| 104 |
+
typedef enum {
|
| 105 |
+
CUDNN_UNIDIRECTIONAL = 0, /* single direction network */
|
| 106 |
+
CUDNN_BIDIRECTIONAL = 1, /* output concatination at each layer */
|
| 107 |
+
} cudnnDirectionMode_t;
|
| 108 |
+
|
| 109 |
+
typedef enum {
|
| 110 |
+
CUDNN_LINEAR_INPUT = 0, /* adjustable weight matrix in first layer input GEMM */
|
| 111 |
+
CUDNN_SKIP_INPUT = 1, /* fixed identity matrix in the first layer input GEMM */
|
| 112 |
+
} cudnnRNNInputMode_t;
|
| 113 |
+
|
| 114 |
+
typedef enum {
|
| 115 |
+
CUDNN_RNN_CLIP_NONE = 0, /* disables LSTM cell clipping */
|
| 116 |
+
CUDNN_RNN_CLIP_MINMAX = 1, /* enables LSTM cell clipping */
|
| 117 |
+
} cudnnRNNClipMode_t;
|
| 118 |
+
|
| 119 |
+
typedef enum {
|
| 120 |
+
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED = 0, /* padded, outer stride from one time-step to the next */
|
| 121 |
+
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED = 1, /* sequence length sorted and packed as in basic RNN api */
|
| 122 |
+
CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2, /* padded, outer stride from one batch to the next */
|
| 123 |
+
} cudnnRNNDataLayout_t;
|
| 124 |
+
|
| 125 |
+
/* For auxFlags in cudnnSetRNNDescriptor_v8() */
|
| 126 |
+
#define CUDNN_RNN_PADDED_IO_DISABLED 0
|
| 127 |
+
#define CUDNN_RNN_PADDED_IO_ENABLED (1U << 0)
|
| 128 |
+
|
| 129 |
+
struct cudnnRNNStruct;
|
| 130 |
+
typedef struct cudnnRNNStruct *cudnnRNNDescriptor_t;
|
| 131 |
+
|
| 132 |
+
struct cudnnRNNDataStruct;
|
| 133 |
+
typedef struct cudnnRNNDataStruct *cudnnRNNDataDescriptor_t;
|
| 134 |
+
|
| 135 |
+
cudnnStatus_t CUDNNWINAPI
|
| 136 |
+
cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc);
|
| 137 |
+
|
| 138 |
+
cudnnStatus_t CUDNNWINAPI
|
| 139 |
+
cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc);
|
| 140 |
+
|
| 141 |
+
/*
|
| 142 |
+
* mathPrec in cudnnSetRNNDescriptor_v8() specifies compute precision.
|
| 143 |
+
* Compute precision is further modified by mathType that sets the
|
| 144 |
+
* preferred option for using NVIDIA Tensor Cores. dataType specify
|
| 145 |
+
* input/output data type and weight/bias type.
|
| 146 |
+
*/
|
| 147 |
+
|
| 148 |
+
cudnnStatus_t CUDNNWINAPI
|
| 149 |
+
cudnnSetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 150 |
+
cudnnRNNAlgo_t algo,
|
| 151 |
+
cudnnRNNMode_t cellMode,
|
| 152 |
+
cudnnRNNBiasMode_t biasMode,
|
| 153 |
+
cudnnDirectionMode_t dirMode,
|
| 154 |
+
cudnnRNNInputMode_t inputMode,
|
| 155 |
+
cudnnDataType_t dataType,
|
| 156 |
+
cudnnDataType_t mathPrec,
|
| 157 |
+
cudnnMathType_t mathType,
|
| 158 |
+
int32_t inputSize,
|
| 159 |
+
int32_t hiddenSize,
|
| 160 |
+
int32_t projSize,
|
| 161 |
+
int32_t numLayers,
|
| 162 |
+
cudnnDropoutDescriptor_t dropoutDesc,
|
| 163 |
+
uint32_t auxFlags);
|
| 164 |
+
|
| 165 |
+
cudnnStatus_t CUDNNWINAPI
|
| 166 |
+
cudnnGetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 167 |
+
cudnnRNNAlgo_t *algo,
|
| 168 |
+
cudnnRNNMode_t *cellMode,
|
| 169 |
+
cudnnRNNBiasMode_t *biasMode,
|
| 170 |
+
cudnnDirectionMode_t *dirMode,
|
| 171 |
+
cudnnRNNInputMode_t *inputMode,
|
| 172 |
+
cudnnDataType_t *dataType,
|
| 173 |
+
cudnnDataType_t *mathPrec,
|
| 174 |
+
cudnnMathType_t *mathType,
|
| 175 |
+
int32_t *inputSize,
|
| 176 |
+
int32_t *hiddenSize,
|
| 177 |
+
int32_t *projSize,
|
| 178 |
+
int32_t *numLayers,
|
| 179 |
+
cudnnDropoutDescriptor_t *dropoutDesc,
|
| 180 |
+
uint32_t *auxFlags);
|
| 181 |
+
|
| 182 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 183 |
+
cudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 184 |
+
cudnnRNNClipMode_t clipMode,
|
| 185 |
+
cudnnNanPropagation_t clipNanOpt,
|
| 186 |
+
double lclip,
|
| 187 |
+
double rclip);
|
| 188 |
+
|
| 189 |
+
cudnnStatus_t CUDNNWINAPI
|
| 190 |
+
cudnnRNNSetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, double lclip, double rclip);
|
| 191 |
+
|
| 192 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 193 |
+
cudnnRNNGetClip_v8(cudnnRNNDescriptor_t rnnDesc,
|
| 194 |
+
cudnnRNNClipMode_t *clipMode,
|
| 195 |
+
cudnnNanPropagation_t *clipNanOpt,
|
| 196 |
+
double *lclip,
|
| 197 |
+
double *rclip);
|
| 198 |
+
|
| 199 |
+
cudnnStatus_t CUDNNWINAPI
|
| 200 |
+
cudnnRNNGetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t *clipMode, double *lclip, double *rclip);
|
| 201 |
+
|
| 202 |
+
cudnnStatus_t CUDNNWINAPI
|
| 203 |
+
cudnnBuildRNNDynamic(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int miniBatch);
|
| 204 |
+
|
| 205 |
+
cudnnStatus_t CUDNNWINAPI
|
| 206 |
+
cudnnGetRNNTempSpaceSizes(cudnnHandle_t handle,
|
| 207 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 208 |
+
cudnnForwardMode_t fwdMode,
|
| 209 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 210 |
+
size_t *workSpaceSize,
|
| 211 |
+
size_t *reserveSpaceSize);
|
| 212 |
+
|
| 213 |
+
cudnnStatus_t CUDNNWINAPI
|
| 214 |
+
cudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, size_t *weightSpaceSize);
|
| 215 |
+
|
| 216 |
+
cudnnStatus_t CUDNNWINAPI
|
| 217 |
+
cudnnGetRNNWeightParams(cudnnHandle_t handle,
|
| 218 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 219 |
+
int32_t pseudoLayer,
|
| 220 |
+
size_t weightSpaceSize,
|
| 221 |
+
const void *weightSpace,
|
| 222 |
+
int32_t linLayerID,
|
| 223 |
+
cudnnTensorDescriptor_t mDesc,
|
| 224 |
+
void **mAddr,
|
| 225 |
+
cudnnTensorDescriptor_t bDesc,
|
| 226 |
+
void **bAddr);
|
| 227 |
+
|
| 228 |
+
cudnnStatus_t CUDNNWINAPI
|
| 229 |
+
cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc);
|
| 230 |
+
|
| 231 |
+
cudnnStatus_t CUDNNWINAPI
|
| 232 |
+
cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc);
|
| 233 |
+
|
| 234 |
+
cudnnStatus_t CUDNNWINAPI
|
| 235 |
+
cudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
|
| 236 |
+
cudnnDataType_t dataType,
|
| 237 |
+
cudnnRNNDataLayout_t layout,
|
| 238 |
+
int maxSeqLength,
|
| 239 |
+
int batchSize,
|
| 240 |
+
int vectorSize,
|
| 241 |
+
const int seqLengthArray[], /* length of each sequence in the batch */
|
| 242 |
+
void *paddingFill); /* symbol for filling padding position in output */
|
| 243 |
+
|
| 244 |
+
cudnnStatus_t CUDNNWINAPI
|
| 245 |
+
cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
|
| 246 |
+
cudnnDataType_t *dataType,
|
| 247 |
+
cudnnRNNDataLayout_t *layout,
|
| 248 |
+
int *maxSeqLength,
|
| 249 |
+
int *batchSize,
|
| 250 |
+
int *vectorSize,
|
| 251 |
+
int arrayLengthRequested,
|
| 252 |
+
int seqLengthArray[],
|
| 253 |
+
void *paddingFill);
|
| 254 |
+
|
| 255 |
+
cudnnStatus_t CUDNNWINAPI
|
| 256 |
+
cudnnRNNForward(cudnnHandle_t handle,
|
| 257 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 258 |
+
cudnnForwardMode_t fwdMode,
|
| 259 |
+
const int32_t devSeqLengths[],
|
| 260 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 261 |
+
const void *x,
|
| 262 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 263 |
+
void *y,
|
| 264 |
+
cudnnTensorDescriptor_t hDesc,
|
| 265 |
+
const void *hx,
|
| 266 |
+
void *hy,
|
| 267 |
+
cudnnTensorDescriptor_t cDesc,
|
| 268 |
+
const void *cx,
|
| 269 |
+
void *cy,
|
| 270 |
+
size_t weightSpaceSize,
|
| 271 |
+
const void *weightSpace,
|
| 272 |
+
size_t workSpaceSize,
|
| 273 |
+
void *workSpace,
|
| 274 |
+
size_t reserveSpaceSize,
|
| 275 |
+
void *reserveSpace);
|
| 276 |
+
|
| 277 |
+
/* Sequence data descriptor */
|
| 278 |
+
|
| 279 |
+
typedef enum {
|
| 280 |
+
CUDNN_SEQDATA_TIME_DIM = 0, /* index in time */
|
| 281 |
+
CUDNN_SEQDATA_BATCH_DIM = 1, /* index in batch */
|
| 282 |
+
CUDNN_SEQDATA_BEAM_DIM = 2, /* index in beam */
|
| 283 |
+
CUDNN_SEQDATA_VECT_DIM = 3 /* index in vector */
|
| 284 |
+
} cudnnSeqDataAxis_t;
|
| 285 |
+
|
| 286 |
+
struct cudnnSeqDataStruct;
|
| 287 |
+
typedef struct cudnnSeqDataStruct *cudnnSeqDataDescriptor_t CUDNN_DEPRECATED;
|
| 288 |
+
|
| 289 |
+
#define CUDNN_SEQDATA_DIM_COUNT 4 /* dimension count */
|
| 290 |
+
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc);
|
| 293 |
+
|
| 294 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 295 |
+
cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc);
|
| 296 |
+
|
| 297 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 298 |
+
cudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc,
|
| 299 |
+
cudnnDataType_t dataType,
|
| 300 |
+
int nbDims,
|
| 301 |
+
const int dimA[],
|
| 302 |
+
const cudnnSeqDataAxis_t axes[],
|
| 303 |
+
size_t seqLengthArraySize,
|
| 304 |
+
const int seqLengthArray[],
|
| 305 |
+
void *paddingFill);
|
| 306 |
+
|
| 307 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 308 |
+
cudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc,
|
| 309 |
+
cudnnDataType_t *dataType,
|
| 310 |
+
int *nbDims,
|
| 311 |
+
int nbDimsRequested,
|
| 312 |
+
int dimA[],
|
| 313 |
+
cudnnSeqDataAxis_t axes[],
|
| 314 |
+
size_t *seqLengthArraySize,
|
| 315 |
+
size_t seqLengthSizeRequested,
|
| 316 |
+
int seqLengthArray[],
|
| 317 |
+
void *paddingFill);
|
| 318 |
+
|
| 319 |
+
/* Multihead Attention */
|
| 320 |
+
|
| 321 |
+
/*
|
| 322 |
+
* Multi-head attention options passed via 'attnMode' in cudnnSetAttnDescriptor().
|
| 323 |
+
* Use the bitwise OR operator to combine several settings listed below. Additional
|
| 324 |
+
* minor options can be added here w/o changing or introducing new API functions.
|
| 325 |
+
*/
|
| 326 |
+
#define CUDNN_ATTN_QUERYMAP_ALL_TO_ONE 0 /* multiple Q-s map to a single (K,V) set when beam size > 1 */
|
| 327 |
+
#define CUDNN_ATTN_QUERYMAP_ONE_TO_ONE (1U << 0) /* multiple Q-s map to multiple (K,V) sets when beam size > 1 */
|
| 328 |
+
#define CUDNN_ATTN_DISABLE_PROJ_BIASES 0 /* no biases in attention input and output projections */
|
| 329 |
+
#define CUDNN_ATTN_ENABLE_PROJ_BIASES (1U << 1) /* use biases in attention input and output projections */
|
| 330 |
+
|
| 331 |
+
struct cudnnAttnStruct;
|
| 332 |
+
typedef struct cudnnAttnStruct *cudnnAttnDescriptor_t CUDNN_DEPRECATED;
|
| 333 |
+
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc);
|
| 336 |
+
|
| 337 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 338 |
+
cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc);
|
| 339 |
+
|
| 340 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 341 |
+
cudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
|
| 342 |
+
unsigned attnMode,
|
| 343 |
+
int nHeads,
|
| 344 |
+
double smScaler,
|
| 345 |
+
cudnnDataType_t dataType,
|
| 346 |
+
cudnnDataType_t computePrec,
|
| 347 |
+
cudnnMathType_t mathType,
|
| 348 |
+
cudnnDropoutDescriptor_t attnDropoutDesc,
|
| 349 |
+
cudnnDropoutDescriptor_t postDropoutDesc,
|
| 350 |
+
int qSize,
|
| 351 |
+
int kSize,
|
| 352 |
+
int vSize,
|
| 353 |
+
int qProjSize,
|
| 354 |
+
int kProjSize,
|
| 355 |
+
int vProjSize,
|
| 356 |
+
int oProjSize,
|
| 357 |
+
int qoMaxSeqLength,
|
| 358 |
+
int kvMaxSeqLength,
|
| 359 |
+
int maxBatchSize,
|
| 360 |
+
int maxBeamSize);
|
| 361 |
+
|
| 362 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 363 |
+
cudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
|
| 364 |
+
unsigned *attnMode,
|
| 365 |
+
int *nHeads,
|
| 366 |
+
double *smScaler,
|
| 367 |
+
cudnnDataType_t *dataType,
|
| 368 |
+
cudnnDataType_t *computePrec,
|
| 369 |
+
cudnnMathType_t *mathType,
|
| 370 |
+
cudnnDropoutDescriptor_t *attnDropoutDesc,
|
| 371 |
+
cudnnDropoutDescriptor_t *postDropoutDesc,
|
| 372 |
+
int *qSize,
|
| 373 |
+
int *kSize,
|
| 374 |
+
int *vSize,
|
| 375 |
+
int *qProjSize,
|
| 376 |
+
int *kProjSize,
|
| 377 |
+
int *vProjSize,
|
| 378 |
+
int *oProjSize,
|
| 379 |
+
int *qoMaxSeqLength,
|
| 380 |
+
int *kvMaxSeqLength,
|
| 381 |
+
int *maxBatchSize,
|
| 382 |
+
int *maxBeamSize);
|
| 383 |
+
|
| 384 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 385 |
+
cudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle,
|
| 386 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 387 |
+
size_t *weightSizeInBytes,
|
| 388 |
+
size_t *workSpaceSizeInBytes,
|
| 389 |
+
size_t *reserveSpaceSizeInBytes);
|
| 390 |
+
|
| 391 |
+
typedef enum {
|
| 392 |
+
CUDNN_MH_ATTN_Q_WEIGHTS = 0, /* input projection weights for 'queries' */
|
| 393 |
+
CUDNN_MH_ATTN_K_WEIGHTS = 1, /* input projection weights for 'keys' */
|
| 394 |
+
CUDNN_MH_ATTN_V_WEIGHTS = 2, /* input projection weights for 'values' */
|
| 395 |
+
CUDNN_MH_ATTN_O_WEIGHTS = 3, /* output projection weights */
|
| 396 |
+
CUDNN_MH_ATTN_Q_BIASES = 4, /* input projection bias tensor for 'queries' */
|
| 397 |
+
CUDNN_MH_ATTN_K_BIASES = 5, /* input projection bias for 'keys' */
|
| 398 |
+
CUDNN_MH_ATTN_V_BIASES = 6, /* input projection bias for 'values' */
|
| 399 |
+
CUDNN_MH_ATTN_O_BIASES = 7, /* output projection biases */
|
| 400 |
+
} cudnnMultiHeadAttnWeightKind_t;
|
| 401 |
+
|
| 402 |
+
#define CUDNN_ATTN_WKIND_COUNT 8 /* Number of attention weight/bias tensors */
|
| 403 |
+
|
| 404 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 405 |
+
cudnnGetMultiHeadAttnWeights(cudnnHandle_t handle,
|
| 406 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 407 |
+
cudnnMultiHeadAttnWeightKind_t wKind,
|
| 408 |
+
size_t weightSizeInBytes,
|
| 409 |
+
const void *weights,
|
| 410 |
+
cudnnTensorDescriptor_t wDesc,
|
| 411 |
+
void **wAddr);
|
| 412 |
+
|
| 413 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 414 |
+
cudnnMultiHeadAttnForward(cudnnHandle_t handle,
|
| 415 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 416 |
+
int currIdx,
|
| 417 |
+
const int loWinIdx[],
|
| 418 |
+
const int hiWinIdx[],
|
| 419 |
+
const int devSeqLengthsQO[],
|
| 420 |
+
const int devSeqLengthsKV[],
|
| 421 |
+
const cudnnSeqDataDescriptor_t qDesc,
|
| 422 |
+
const void *queries,
|
| 423 |
+
const void *residuals,
|
| 424 |
+
const cudnnSeqDataDescriptor_t kDesc,
|
| 425 |
+
const void *keys,
|
| 426 |
+
const cudnnSeqDataDescriptor_t vDesc,
|
| 427 |
+
const void *values,
|
| 428 |
+
const cudnnSeqDataDescriptor_t oDesc,
|
| 429 |
+
void *out,
|
| 430 |
+
size_t weightSizeInBytes,
|
| 431 |
+
const void *weights,
|
| 432 |
+
size_t workSpaceSizeInBytes,
|
| 433 |
+
void *workSpace,
|
| 434 |
+
size_t reserveSpaceSizeInBytes,
|
| 435 |
+
void *reserveSpace);
|
| 436 |
+
|
| 437 |
+
/*
|
| 438 |
+
* \brief Cross-library version checker.
|
| 439 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 440 |
+
* checks whether its own version matches that of its dependencies.
|
| 441 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 442 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 443 |
+
*/
|
| 444 |
+
cudnnStatus_t CUDNNWINAPI
|
| 445 |
+
cudnnAdvVersionCheck(void);
|
| 446 |
+
|
| 447 |
+
typedef enum {
|
| 448 |
+
CUDNN_WGRAD_MODE_ADD = 0, /* add partial gradients to wgrad output buffers */
|
| 449 |
+
CUDNN_WGRAD_MODE_SET = 1, /* write partial gradients to wgrad output buffers */
|
| 450 |
+
} cudnnWgradMode_t;
|
| 451 |
+
|
| 452 |
+
cudnnStatus_t CUDNNWINAPI
|
| 453 |
+
cudnnRNNBackwardData_v8(cudnnHandle_t handle,
|
| 454 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 455 |
+
const int32_t devSeqLengths[],
|
| 456 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 457 |
+
const void *y,
|
| 458 |
+
const void *dy,
|
| 459 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 460 |
+
void *dx,
|
| 461 |
+
cudnnTensorDescriptor_t hDesc,
|
| 462 |
+
const void *hx,
|
| 463 |
+
const void *dhy,
|
| 464 |
+
void *dhx,
|
| 465 |
+
cudnnTensorDescriptor_t cDesc,
|
| 466 |
+
const void *cx,
|
| 467 |
+
const void *dcy,
|
| 468 |
+
void *dcx,
|
| 469 |
+
size_t weightSpaceSize,
|
| 470 |
+
const void *weightSpace,
|
| 471 |
+
size_t workSpaceSize,
|
| 472 |
+
void *workSpace,
|
| 473 |
+
size_t reserveSpaceSize,
|
| 474 |
+
void *reserveSpace);
|
| 475 |
+
|
| 476 |
+
cudnnStatus_t CUDNNWINAPI
|
| 477 |
+
cudnnRNNBackwardWeights_v8(cudnnHandle_t handle,
|
| 478 |
+
cudnnRNNDescriptor_t rnnDesc,
|
| 479 |
+
cudnnWgradMode_t addGrad,
|
| 480 |
+
const int32_t devSeqLengths[],
|
| 481 |
+
cudnnRNNDataDescriptor_t xDesc,
|
| 482 |
+
const void *x,
|
| 483 |
+
cudnnTensorDescriptor_t hDesc,
|
| 484 |
+
const void *hx,
|
| 485 |
+
cudnnRNNDataDescriptor_t yDesc,
|
| 486 |
+
const void *y,
|
| 487 |
+
size_t weightSpaceSize,
|
| 488 |
+
void *dweightSpace,
|
| 489 |
+
size_t workSpaceSize,
|
| 490 |
+
void *workSpace,
|
| 491 |
+
size_t reserveSpaceSize,
|
| 492 |
+
void *reserveSpace);
|
| 493 |
+
|
| 494 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 495 |
+
cudnnMultiHeadAttnBackwardData(cudnnHandle_t handle,
|
| 496 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 497 |
+
const int loWinIdx[],
|
| 498 |
+
const int hiWinIdx[],
|
| 499 |
+
const int devSeqLengthsDQDO[],
|
| 500 |
+
const int devSeqLengthsDKDV[],
|
| 501 |
+
const cudnnSeqDataDescriptor_t doDesc,
|
| 502 |
+
const void *dout,
|
| 503 |
+
const cudnnSeqDataDescriptor_t dqDesc,
|
| 504 |
+
void *dqueries,
|
| 505 |
+
const void *queries,
|
| 506 |
+
const cudnnSeqDataDescriptor_t dkDesc,
|
| 507 |
+
void *dkeys,
|
| 508 |
+
const void *keys,
|
| 509 |
+
const cudnnSeqDataDescriptor_t dvDesc,
|
| 510 |
+
void *dvalues,
|
| 511 |
+
const void *values,
|
| 512 |
+
size_t weightSizeInBytes,
|
| 513 |
+
const void *weights,
|
| 514 |
+
size_t workSpaceSizeInBytes,
|
| 515 |
+
void *workSpace,
|
| 516 |
+
size_t reserveSpaceSizeInBytes,
|
| 517 |
+
void *reserveSpace);
|
| 518 |
+
|
| 519 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 520 |
+
cudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle,
|
| 521 |
+
const cudnnAttnDescriptor_t attnDesc,
|
| 522 |
+
cudnnWgradMode_t addGrad,
|
| 523 |
+
const cudnnSeqDataDescriptor_t qDesc,
|
| 524 |
+
const void *queries,
|
| 525 |
+
const cudnnSeqDataDescriptor_t kDesc,
|
| 526 |
+
const void *keys,
|
| 527 |
+
const cudnnSeqDataDescriptor_t vDesc,
|
| 528 |
+
const void *values,
|
| 529 |
+
const cudnnSeqDataDescriptor_t doDesc,
|
| 530 |
+
const void *dout,
|
| 531 |
+
size_t weightSizeInBytes,
|
| 532 |
+
const void *weights,
|
| 533 |
+
void *dweights,
|
| 534 |
+
size_t workSpaceSizeInBytes,
|
| 535 |
+
void *workSpace,
|
| 536 |
+
size_t reserveSpaceSizeInBytes,
|
| 537 |
+
void *reserveSpace);
|
| 538 |
+
|
| 539 |
+
/*
|
| 540 |
+
* CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
|
| 541 |
+
*/
|
| 542 |
+
/* Input normalization mode for loss function */
|
| 543 |
+
typedef enum {
|
| 544 |
+
CUDNN_LOSS_NORMALIZATION_NONE = 0,
|
| 545 |
+
CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
|
| 546 |
+
} cudnnLossNormalizationMode_t;
|
| 547 |
+
|
| 548 |
+
cudnnStatus_t CUDNNWINAPI
|
| 549 |
+
cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc);
|
| 550 |
+
|
| 551 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 552 |
+
cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
|
| 553 |
+
|
| 554 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 555 |
+
cudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 556 |
+
cudnnDataType_t compType,
|
| 557 |
+
cudnnLossNormalizationMode_t normMode,
|
| 558 |
+
cudnnNanPropagation_t gradMode);
|
| 559 |
+
|
| 560 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 561 |
+
cudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 562 |
+
cudnnDataType_t compType,
|
| 563 |
+
cudnnLossNormalizationMode_t normMode,
|
| 564 |
+
cudnnNanPropagation_t gradMode,
|
| 565 |
+
int maxLabelLength);
|
| 566 |
+
|
| 567 |
+
cudnnStatus_t CUDNNWINAPI
|
| 568 |
+
cudnnSetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 569 |
+
cudnnDataType_t compType,
|
| 570 |
+
cudnnLossNormalizationMode_t normMode,
|
| 571 |
+
cudnnCTCGradMode_t ctcGradMode,
|
| 572 |
+
int maxLabelLength);
|
| 573 |
+
|
| 574 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 575 |
+
cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType);
|
| 576 |
+
|
| 577 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 578 |
+
cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 579 |
+
cudnnDataType_t *compType,
|
| 580 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 581 |
+
cudnnNanPropagation_t *gradMode);
|
| 582 |
+
|
| 583 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 584 |
+
cudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 585 |
+
cudnnDataType_t *compType,
|
| 586 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 587 |
+
cudnnNanPropagation_t *gradMode,
|
| 588 |
+
int *maxLabelLength);
|
| 589 |
+
|
| 590 |
+
cudnnStatus_t CUDNNWINAPI
|
| 591 |
+
cudnnGetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 592 |
+
cudnnDataType_t *compType,
|
| 593 |
+
cudnnLossNormalizationMode_t *normMode,
|
| 594 |
+
cudnnCTCGradMode_t *ctcGradMode,
|
| 595 |
+
int *maxLabelLength);
|
| 596 |
+
|
| 597 |
+
cudnnStatus_t CUDNNWINAPI
|
| 598 |
+
cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
|
| 599 |
+
|
| 600 |
+
/* return the ctc costs and gradients, given the probabilities and labels */
|
| 601 |
+
cudnnStatus_t CUDNNWINAPI
|
| 602 |
+
cudnnCTCLoss(
|
| 603 |
+
cudnnHandle_t handle,
|
| 604 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 605 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 606 |
+
const void *probs, /* probabilities after softmax, in GPU memory */
|
| 607 |
+
const int hostLabels[], /* labels, in CPU memory */
|
| 608 |
+
const int hostLabelLengths[], /* the length of each label, in CPU memory */
|
| 609 |
+
const int hostInputLengths[], /* the lengths of timing steps in each batch, in CPU memory */
|
| 610 |
+
void *costs, /* the returned costs of CTC, in GPU memory */
|
| 611 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
|
| 612 |
+
void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
|
| 613 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 614 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 615 |
+
void *workspace, /* pointer to the workspace, in GPU memory */
|
| 616 |
+
size_t workSpaceSizeInBytes); /* size of the workspace */
|
| 617 |
+
|
| 618 |
+
/* return the ctc costs and gradients, given the probabilities and labels */
|
| 619 |
+
cudnnStatus_t CUDNNWINAPI
|
| 620 |
+
cudnnCTCLoss_v8(
|
| 621 |
+
cudnnHandle_t handle,
|
| 622 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 623 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 624 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 625 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 626 |
+
const void *probs, /* probabilities after softmax, in GPU memory */
|
| 627 |
+
const int labels[], /* labels, in GPU memory */
|
| 628 |
+
const int labelLengths[], /* the length of each label, in GPU memory */
|
| 629 |
+
const int inputLengths[], /* the lengths of timing steps in each batch, in GPU memory */
|
| 630 |
+
void *costs, /* the returned costs of CTC, in GPU memory */
|
| 631 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
|
| 632 |
+
void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
|
| 633 |
+
size_t workSpaceSizeInBytes, /* size of the workspace */
|
| 634 |
+
void *workspace); /* pointer to the workspace, in GPU memory */
|
| 635 |
+
|
| 636 |
+
/* return the workspace size needed for ctc */
|
| 637 |
+
cudnnStatus_t CUDNNWINAPI
|
| 638 |
+
cudnnGetCTCLossWorkspaceSize(
|
| 639 |
+
cudnnHandle_t handle,
|
| 640 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 641 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 642 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
|
| 643 |
+
dimensions are T,N,A. To compute costs
|
| 644 |
+
only, set it to NULL */
|
| 645 |
+
const int *labels, /* labels, in CPU memory */
|
| 646 |
+
const int *labelLengths, /* the length of each label, in CPU memory */
|
| 647 |
+
const int *inputLengths, /* the lengths of timing steps in each batch, in CPU memory */
|
| 648 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 649 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 650 |
+
size_t *sizeInBytes); /* pointer to the returned workspace size */
|
| 651 |
+
|
| 652 |
+
/* return the workspace size needed for ctc */
|
| 653 |
+
cudnnStatus_t CUDNNWINAPI
|
| 654 |
+
cudnnGetCTCLossWorkspaceSize_v8(
|
| 655 |
+
cudnnHandle_t handle,
|
| 656 |
+
cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
|
| 657 |
+
cudnnCTCLossDescriptor_t ctcLossDesc,
|
| 658 |
+
const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
|
| 659 |
+
timing steps, N is the mini batch size, A is the alphabet size) */
|
| 660 |
+
const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
|
| 661 |
+
dimensions are T,N,A. To compute costs
|
| 662 |
+
only, set it to NULL */
|
| 663 |
+
size_t *sizeInBytes); /* pointer to the returned workspace size */
|
| 664 |
+
|
| 665 |
+
#if defined(__cplusplus)
|
| 666 |
+
}
|
| 667 |
+
#endif
|
| 668 |
+
|
| 669 |
+
#endif /* CUDNN_ADV_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_backend.h
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#ifndef _CUDNN_BACKEND_H_
|
| 51 |
+
#define _CUDNN_BACKEND_H_
|
| 52 |
+
|
| 53 |
+
/*
|
| 54 |
+
* The content of this header has been moved into cudnn_graph.h.
|
| 55 |
+
* This header is kept for the backward compatibility purpose.
|
| 56 |
+
*/
|
| 57 |
+
|
| 58 |
+
#include "cudnn_graph.h"
|
| 59 |
+
|
| 60 |
+
#endif /* _CUDNN_BACKEND_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_backend_v9.h
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#ifndef _CUDNN_BACKEND_H_
|
| 51 |
+
#define _CUDNN_BACKEND_H_
|
| 52 |
+
|
| 53 |
+
/*
|
| 54 |
+
* The content of this header has been moved into cudnn_graph.h.
|
| 55 |
+
* This header is kept for the backward compatibility purpose.
|
| 56 |
+
*/
|
| 57 |
+
|
| 58 |
+
#include "cudnn_graph.h"
|
| 59 |
+
|
| 60 |
+
#endif /* _CUDNN_BACKEND_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_cnn.h
ADDED
|
@@ -0,0 +1,693 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_cnn : cuDNN's basic definitions and CNN functions.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_CNN_H_)
|
| 55 |
+
#define CUDNN_CNN_H_
|
| 56 |
+
|
| 57 |
+
#pragma once
|
| 58 |
+
#include <stdint.h>
|
| 59 |
+
|
| 60 |
+
#include "cudnn_version.h"
|
| 61 |
+
#include "cudnn_ops.h"
|
| 62 |
+
|
| 63 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 64 |
+
#define CUDNN_CNN_MAJOR 9
|
| 65 |
+
#define CUDNN_CNN_MINOR 10
|
| 66 |
+
#define CUDNN_CNN_PATCH 2
|
| 67 |
+
|
| 68 |
+
#if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL)
|
| 69 |
+
#error Version mismatch in cuDNN CNN INFER!!!
|
| 70 |
+
#endif
|
| 71 |
+
|
| 72 |
+
#if defined(__cplusplus)
|
| 73 |
+
extern "C" {
|
| 74 |
+
#endif
|
| 75 |
+
|
| 76 |
+
typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t CUDNN_DEPRECATED;
|
| 77 |
+
|
| 78 |
+
typedef struct cudnnConvolutionFwdAlgoPerfStruct {
|
| 79 |
+
cudnnConvolutionFwdAlgo_t algo;
|
| 80 |
+
cudnnStatus_t status;
|
| 81 |
+
float time;
|
| 82 |
+
size_t memory;
|
| 83 |
+
cudnnDeterminism_t determinism;
|
| 84 |
+
cudnnMathType_t mathType;
|
| 85 |
+
int reserved[3];
|
| 86 |
+
} cudnnConvolutionFwdAlgoPerf_t CUDNN_DEPRECATED;
|
| 87 |
+
|
| 88 |
+
/* Create an instance of convolution descriptor */
|
| 89 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 90 |
+
cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
|
| 91 |
+
|
| 92 |
+
/* Destroy an instance of convolution descriptor */
|
| 93 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 94 |
+
cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
|
| 95 |
+
|
| 96 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 97 |
+
cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
|
| 98 |
+
|
| 99 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 100 |
+
cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
|
| 101 |
+
|
| 102 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 103 |
+
cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
|
| 104 |
+
|
| 105 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 106 |
+
cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
|
| 107 |
+
|
| 108 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 109 |
+
cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
|
| 110 |
+
|
| 111 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 112 |
+
cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
|
| 113 |
+
|
| 114 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 115 |
+
cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 116 |
+
int pad_h, /* zero-padding height */
|
| 117 |
+
int pad_w, /* zero-padding width */
|
| 118 |
+
int u, /* vertical filter stride */
|
| 119 |
+
int v, /* horizontal filter stride */
|
| 120 |
+
int dilation_h, /* filter dilation in the vertical dimension */
|
| 121 |
+
int dilation_w, /* filter dilation in the horizontal dimension */
|
| 122 |
+
cudnnConvolutionMode_t mode,
|
| 123 |
+
cudnnDataType_t computeType);
|
| 124 |
+
|
| 125 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 126 |
+
cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 127 |
+
int *pad_h, /* zero-padding height */
|
| 128 |
+
int *pad_w, /* zero-padding width */
|
| 129 |
+
int *u, /* vertical filter stride */
|
| 130 |
+
int *v, /* horizontal filter stride */
|
| 131 |
+
int *dilation_h, /* filter dilation in the vertical dimension */
|
| 132 |
+
int *dilation_w, /* filter dilation in the horizontal dimension */
|
| 133 |
+
cudnnConvolutionMode_t *mode,
|
| 134 |
+
cudnnDataType_t *computeType);
|
| 135 |
+
|
| 136 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 137 |
+
cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 138 |
+
int arrayLength, /* nbDims-2 size */
|
| 139 |
+
const int padA[],
|
| 140 |
+
const int filterStrideA[],
|
| 141 |
+
const int dilationA[],
|
| 142 |
+
cudnnConvolutionMode_t mode,
|
| 143 |
+
cudnnDataType_t computeType); /* convolution data type */
|
| 144 |
+
|
| 145 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 146 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 147 |
+
cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 148 |
+
int arrayLengthRequested,
|
| 149 |
+
int *arrayLength,
|
| 150 |
+
int padA[],
|
| 151 |
+
int strideA[],
|
| 152 |
+
int dilationA[],
|
| 153 |
+
cudnnConvolutionMode_t *mode,
|
| 154 |
+
cudnnDataType_t *computeType); /* convolution data type */
|
| 155 |
+
|
| 156 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 157 |
+
cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 158 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 159 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 160 |
+
int *n,
|
| 161 |
+
int *c,
|
| 162 |
+
int *h,
|
| 163 |
+
int *w);
|
| 164 |
+
|
| 165 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 166 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 167 |
+
cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 168 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 169 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 170 |
+
int nbDims,
|
| 171 |
+
int tensorOuputDimA[]);
|
| 172 |
+
|
| 173 |
+
/* helper function to provide the convolution forward algo that fit best the requirement */
|
| 174 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 175 |
+
cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 176 |
+
|
| 177 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 178 |
+
cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
|
| 179 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 180 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 181 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 182 |
+
const cudnnTensorDescriptor_t destDesc,
|
| 183 |
+
const int requestedAlgoCount,
|
| 184 |
+
int *returnedAlgoCount,
|
| 185 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 186 |
+
|
| 187 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 188 |
+
cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
|
| 189 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 190 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 191 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 192 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 193 |
+
const int requestedAlgoCount,
|
| 194 |
+
int *returnedAlgoCount,
|
| 195 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 196 |
+
|
| 197 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 198 |
+
cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
|
| 199 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 200 |
+
const void *x,
|
| 201 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 202 |
+
const void *w,
|
| 203 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 204 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 205 |
+
void *y,
|
| 206 |
+
const int requestedAlgoCount,
|
| 207 |
+
int *returnedAlgoCount,
|
| 208 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults,
|
| 209 |
+
void *workSpace,
|
| 210 |
+
size_t workSpaceSizeInBytes);
|
| 211 |
+
|
| 212 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 213 |
+
cudnnIm2Col(cudnnHandle_t handle,
|
| 214 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 215 |
+
const void *x,
|
| 216 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 217 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 218 |
+
void *colBuffer);
|
| 219 |
+
|
| 220 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 221 |
+
cudnnReorderFilterAndBias(cudnnHandle_t handle,
|
| 222 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 223 |
+
cudnnReorderType_t reorderType,
|
| 224 |
+
const void *filterData,
|
| 225 |
+
void *reorderedFilterData,
|
| 226 |
+
int reorderBias,
|
| 227 |
+
const void *biasData,
|
| 228 |
+
void *reorderedBiasData);
|
| 229 |
+
|
| 230 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 231 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 232 |
+
cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
|
| 233 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 234 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 235 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 236 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 237 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 238 |
+
size_t *sizeInBytes);
|
| 239 |
+
|
| 240 |
+
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 241 |
+
|
| 242 |
+
/* Function to perform the forward pass for batch convolution */
|
| 243 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 244 |
+
cudnnConvolutionForward(cudnnHandle_t handle,
|
| 245 |
+
const void *alpha,
|
| 246 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 247 |
+
const void *x,
|
| 248 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 249 |
+
const void *w,
|
| 250 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 251 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 252 |
+
void *workSpace,
|
| 253 |
+
size_t workSpaceSizeInBytes,
|
| 254 |
+
const void *beta,
|
| 255 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 256 |
+
void *y);
|
| 257 |
+
|
| 258 |
+
/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
|
| 259 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 260 |
+
cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
|
| 261 |
+
const void *alpha1,
|
| 262 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 263 |
+
const void *x,
|
| 264 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 265 |
+
const void *w,
|
| 266 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 267 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 268 |
+
void *workSpace,
|
| 269 |
+
size_t workSpaceSizeInBytes,
|
| 270 |
+
const void *alpha2,
|
| 271 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 272 |
+
const void *z,
|
| 273 |
+
const cudnnTensorDescriptor_t biasDesc,
|
| 274 |
+
const void *bias,
|
| 275 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 276 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 277 |
+
void *y);
|
| 278 |
+
|
| 279 |
+
/* helper function to provide the convolution backward data algo that fit best the requirement */
|
| 280 |
+
|
| 281 |
+
typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
|
| 282 |
+
cudnnConvolutionBwdDataAlgo_t algo;
|
| 283 |
+
cudnnStatus_t status;
|
| 284 |
+
float time;
|
| 285 |
+
size_t memory;
|
| 286 |
+
cudnnDeterminism_t determinism;
|
| 287 |
+
cudnnMathType_t mathType;
|
| 288 |
+
int reserved[3];
|
| 289 |
+
} cudnnConvolutionBwdDataAlgoPerf_t CUDNN_DEPRECATED;
|
| 290 |
+
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 293 |
+
|
| 294 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 295 |
+
cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
|
| 296 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 297 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 298 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 299 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 300 |
+
const int requestedAlgoCount,
|
| 301 |
+
int *returnedAlgoCount,
|
| 302 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 303 |
+
|
| 304 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 305 |
+
cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
|
| 306 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 307 |
+
const void *w,
|
| 308 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 309 |
+
const void *dy,
|
| 310 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 311 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 312 |
+
void *dx,
|
| 313 |
+
const int requestedAlgoCount,
|
| 314 |
+
int *returnedAlgoCount,
|
| 315 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
|
| 316 |
+
void *workSpace,
|
| 317 |
+
size_t workSpaceSizeInBytes);
|
| 318 |
+
|
| 319 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 320 |
+
cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
|
| 321 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 322 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 323 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 324 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 325 |
+
const int requestedAlgoCount,
|
| 326 |
+
int *returnedAlgoCount,
|
| 327 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 328 |
+
|
| 329 |
+
/*
|
| 330 |
+
* convolution algorithm (which requires potentially some workspace)
|
| 331 |
+
*/
|
| 332 |
+
|
| 333 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
|
| 336 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 337 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 338 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 339 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 340 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 341 |
+
size_t *sizeInBytes);
|
| 342 |
+
|
| 343 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 344 |
+
cudnnConvolutionBackwardData(cudnnHandle_t handle,
|
| 345 |
+
const void *alpha,
|
| 346 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 347 |
+
const void *w,
|
| 348 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 349 |
+
const void *dy,
|
| 350 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 351 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 352 |
+
void *workSpace,
|
| 353 |
+
size_t workSpaceSizeInBytes,
|
| 354 |
+
const void *beta,
|
| 355 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 356 |
+
void *dx);
|
| 357 |
+
|
| 358 |
+
/* Helper function to calculate folding descriptors for dgrad */
|
| 359 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 360 |
+
cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
|
| 361 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 362 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 363 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 364 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 365 |
+
const cudnnTensorFormat_t transformFormat,
|
| 366 |
+
cudnnFilterDescriptor_t foldedFilterDesc,
|
| 367 |
+
cudnnTensorDescriptor_t paddedDiffDesc,
|
| 368 |
+
cudnnConvolutionDescriptor_t foldedConvDesc,
|
| 369 |
+
cudnnTensorDescriptor_t foldedGradDesc,
|
| 370 |
+
cudnnTensorTransformDescriptor_t filterFoldTransDesc,
|
| 371 |
+
cudnnTensorTransformDescriptor_t diffPadTransDesc,
|
| 372 |
+
cudnnTensorTransformDescriptor_t gradFoldTransDesc,
|
| 373 |
+
cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
|
| 374 |
+
|
| 375 |
+
/* cudnnFusedOps... */
|
| 376 |
+
struct cudnnFusedOpsConstParamStruct;
|
| 377 |
+
typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t CUDNN_DEPRECATED;
|
| 378 |
+
|
| 379 |
+
struct cudnnFusedOpsVariantParamStruct;
|
| 380 |
+
typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t CUDNN_DEPRECATED;
|
| 381 |
+
|
| 382 |
+
struct cudnnFusedOpsPlanStruct;
|
| 383 |
+
typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t CUDNN_DEPRECATED;
|
| 384 |
+
|
| 385 |
+
typedef enum {
|
| 386 |
+
/* each op in [ ] can be disabled by passing NULL ptr */
|
| 387 |
+
/* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
|
| 388 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
|
| 389 |
+
/* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
|
| 390 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
|
| 391 |
+
/* utility for BN training in BN-conv fusion */
|
| 392 |
+
/* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
|
| 393 |
+
/* optionally update running stats and generate saved stats */
|
| 394 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
|
| 395 |
+
/* utility for BN inference in BN-conv fusion */
|
| 396 |
+
/* computes the equivalent scale and bias from learned running stats and learned scale, bias */
|
| 397 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
|
| 398 |
+
/* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
|
| 399 |
+
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
|
| 400 |
+
/* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
|
| 401 |
+
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
|
| 402 |
+
/* reserved for future use */
|
| 403 |
+
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
|
| 404 |
+
} cudnnFusedOps_t CUDNN_DEPRECATED;
|
| 405 |
+
|
| 406 |
+
typedef enum {
|
| 407 |
+
/* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 408 |
+
/* get XDESC: pass previously created cudnnTensorDescriptor_t */
|
| 409 |
+
CUDNN_PARAM_XDESC = 0,
|
| 410 |
+
/* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 411 |
+
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
|
| 412 |
+
/* set/get BN_MODE: pass cudnnBatchNormMode_t* */
|
| 413 |
+
CUDNN_PARAM_BN_MODE = 2,
|
| 414 |
+
/* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 415 |
+
/* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 416 |
+
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
|
| 417 |
+
/* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 418 |
+
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
|
| 419 |
+
/* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 420 |
+
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
|
| 421 |
+
/* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
|
| 422 |
+
/* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
|
| 423 |
+
CUDNN_PARAM_ACTIVATION_DESC = 6,
|
| 424 |
+
/* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
|
| 425 |
+
/* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
|
| 426 |
+
CUDNN_PARAM_CONV_DESC = 7,
|
| 427 |
+
/* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 428 |
+
/* get WDESC: pass previously created cudnnFilterDescriptor_t */
|
| 429 |
+
CUDNN_PARAM_WDESC = 8,
|
| 430 |
+
/* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 431 |
+
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
|
| 432 |
+
/* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 433 |
+
/* get DWDESC: pass previously created cudnnFilterDescriptor_t */
|
| 434 |
+
CUDNN_PARAM_DWDESC = 10,
|
| 435 |
+
/* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 436 |
+
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
|
| 437 |
+
/* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 438 |
+
/* get YDESC: pass previously created cudnnTensorDescriptor_t */
|
| 439 |
+
CUDNN_PARAM_YDESC = 12,
|
| 440 |
+
/* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 441 |
+
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
|
| 442 |
+
/* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 443 |
+
/* get DYDESC: pass previously created cudnnTensorDescriptor_t */
|
| 444 |
+
CUDNN_PARAM_DYDESC = 14,
|
| 445 |
+
/* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 446 |
+
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
|
| 447 |
+
/* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 448 |
+
/* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 449 |
+
CUDNN_PARAM_YSTATS_DESC = 16,
|
| 450 |
+
/* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 451 |
+
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
|
| 452 |
+
/* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 453 |
+
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
|
| 454 |
+
/* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 455 |
+
/* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 456 |
+
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
|
| 457 |
+
/* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 458 |
+
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
|
| 459 |
+
/* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 460 |
+
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
|
| 461 |
+
/* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 462 |
+
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
|
| 463 |
+
/* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 464 |
+
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
|
| 465 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 466 |
+
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
|
| 467 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 468 |
+
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
|
| 469 |
+
|
| 470 |
+
/* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 471 |
+
/* get ZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 472 |
+
CUDNN_PARAM_ZDESC = 26,
|
| 473 |
+
/* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 474 |
+
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
|
| 475 |
+
/* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 476 |
+
/* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 477 |
+
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
|
| 478 |
+
/* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 479 |
+
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
|
| 480 |
+
/* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 481 |
+
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
|
| 482 |
+
|
| 483 |
+
/* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 484 |
+
/* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 485 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
|
| 486 |
+
/* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 487 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
|
| 488 |
+
|
| 489 |
+
/* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 490 |
+
/* get DXDESC: pass previously created cudnnTensorDescriptor_t */
|
| 491 |
+
CUDNN_PARAM_DXDESC = 33,
|
| 492 |
+
/* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 493 |
+
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
|
| 494 |
+
/* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 495 |
+
/* get DZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 496 |
+
CUDNN_PARAM_DZDESC = 35,
|
| 497 |
+
/* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 498 |
+
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
|
| 499 |
+
/* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 500 |
+
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
|
| 501 |
+
/* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 502 |
+
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
|
| 503 |
+
} cudnnFusedOpsConstParamLabel_t CUDNN_DEPRECATED;
|
| 504 |
+
|
| 505 |
+
typedef enum {
|
| 506 |
+
CUDNN_PTR_NULL = 0,
|
| 507 |
+
CUDNN_PTR_ELEM_ALIGNED = 1,
|
| 508 |
+
CUDNN_PTR_16B_ALIGNED = 2,
|
| 509 |
+
} cudnnFusedOpsPointerPlaceHolder_t CUDNN_DEPRECATED;
|
| 510 |
+
|
| 511 |
+
typedef enum {
|
| 512 |
+
/* set: pass void* pointing to dev memory */
|
| 513 |
+
/* get: pass void** pointing to host memory */
|
| 514 |
+
CUDNN_PTR_XDATA = 0,
|
| 515 |
+
CUDNN_PTR_BN_EQSCALE = 1,
|
| 516 |
+
CUDNN_PTR_BN_EQBIAS = 2,
|
| 517 |
+
CUDNN_PTR_WDATA = 3,
|
| 518 |
+
CUDNN_PTR_DWDATA = 4,
|
| 519 |
+
CUDNN_PTR_YDATA = 5,
|
| 520 |
+
CUDNN_PTR_DYDATA = 6,
|
| 521 |
+
CUDNN_PTR_YSUM = 7,
|
| 522 |
+
CUDNN_PTR_YSQSUM = 8,
|
| 523 |
+
CUDNN_PTR_WORKSPACE = 9,
|
| 524 |
+
CUDNN_PTR_BN_SCALE = 10,
|
| 525 |
+
CUDNN_PTR_BN_BIAS = 11,
|
| 526 |
+
CUDNN_PTR_BN_SAVED_MEAN = 12,
|
| 527 |
+
CUDNN_PTR_BN_SAVED_INVSTD = 13,
|
| 528 |
+
CUDNN_PTR_BN_RUNNING_MEAN = 14,
|
| 529 |
+
CUDNN_PTR_BN_RUNNING_VAR = 15,
|
| 530 |
+
CUDNN_PTR_ZDATA = 16,
|
| 531 |
+
CUDNN_PTR_BN_Z_EQSCALE = 17,
|
| 532 |
+
CUDNN_PTR_BN_Z_EQBIAS = 18,
|
| 533 |
+
CUDNN_PTR_ACTIVATION_BITMASK = 19,
|
| 534 |
+
CUDNN_PTR_DXDATA = 20,
|
| 535 |
+
CUDNN_PTR_DZDATA = 21,
|
| 536 |
+
CUDNN_PTR_BN_DSCALE = 22,
|
| 537 |
+
CUDNN_PTR_BN_DBIAS = 23,
|
| 538 |
+
|
| 539 |
+
/* set/get: pass size_t* pointing to host memory */
|
| 540 |
+
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
|
| 541 |
+
/* set/get: pass int64_t* pointing to host memory */
|
| 542 |
+
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
|
| 543 |
+
/* set/get: pass double* pointing to host memory */
|
| 544 |
+
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
|
| 545 |
+
/* set/get: pass double* pointing to host memory */
|
| 546 |
+
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
|
| 547 |
+
} cudnnFusedOpsVariantParamLabel_t CUDNN_DEPRECATED;
|
| 548 |
+
|
| 549 |
+
cudnnStatus_t CUDNNWINAPI
|
| 550 |
+
cudnnCnnVersionCheck(void);
|
| 551 |
+
|
| 552 |
+
/* helper function to provide the convolution backward filter algo that fit best the requirement */
|
| 553 |
+
|
| 554 |
+
typedef struct cudnnConvolutionBwdFilterAlgoPerfStruct {
|
| 555 |
+
cudnnConvolutionBwdFilterAlgo_t algo;
|
| 556 |
+
cudnnStatus_t status;
|
| 557 |
+
float time;
|
| 558 |
+
size_t memory;
|
| 559 |
+
cudnnDeterminism_t determinism;
|
| 560 |
+
cudnnMathType_t mathType;
|
| 561 |
+
int reserved[3];
|
| 562 |
+
} cudnnConvolutionBwdFilterAlgoPerf_t CUDNN_DEPRECATED;
|
| 563 |
+
|
| 564 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 565 |
+
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 566 |
+
|
| 567 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 568 |
+
cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
|
| 569 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 570 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 571 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 572 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 573 |
+
const int requestedAlgoCount,
|
| 574 |
+
int *returnedAlgoCount,
|
| 575 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
| 576 |
+
|
| 577 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 578 |
+
cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
|
| 579 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 580 |
+
const void *x,
|
| 581 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 582 |
+
const void *y,
|
| 583 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 584 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 585 |
+
void *dw,
|
| 586 |
+
const int requestedAlgoCount,
|
| 587 |
+
int *returnedAlgoCount,
|
| 588 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
|
| 589 |
+
void *workSpace,
|
| 590 |
+
size_t workSpaceSizeInBytes);
|
| 591 |
+
|
| 592 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 593 |
+
cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
|
| 594 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 595 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 596 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 597 |
+
const cudnnFilterDescriptor_t gradDesc,
|
| 598 |
+
const int requestedAlgoCount,
|
| 599 |
+
int *returnedAlgoCount,
|
| 600 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
| 601 |
+
|
| 602 |
+
/*
|
| 603 |
+
* convolution algorithm (which requires potentially some workspace)
|
| 604 |
+
*/
|
| 605 |
+
|
| 606 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 607 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 608 |
+
cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
|
| 609 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 610 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 611 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 612 |
+
const cudnnFilterDescriptor_t gradDesc,
|
| 613 |
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
| 614 |
+
size_t *sizeInBytes);
|
| 615 |
+
|
| 616 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 617 |
+
cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
|
| 618 |
+
const void *alpha,
|
| 619 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 620 |
+
const void *x,
|
| 621 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 622 |
+
const void *dy,
|
| 623 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 624 |
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
| 625 |
+
void *workSpace,
|
| 626 |
+
size_t workSpaceSizeInBytes,
|
| 627 |
+
const void *beta,
|
| 628 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 629 |
+
void *dw);
|
| 630 |
+
|
| 631 |
+
/* Function to compute the bias gradient for batch convolution */
|
| 632 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 633 |
+
cudnnConvolutionBackwardBias(cudnnHandle_t handle,
|
| 634 |
+
const void *alpha,
|
| 635 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 636 |
+
const void *dy,
|
| 637 |
+
const void *beta,
|
| 638 |
+
const cudnnTensorDescriptor_t dbDesc,
|
| 639 |
+
void *db);
|
| 640 |
+
|
| 641 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 642 |
+
cudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops);
|
| 643 |
+
|
| 644 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 645 |
+
cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack);
|
| 646 |
+
|
| 647 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 648 |
+
cudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack,
|
| 649 |
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
| 650 |
+
const void *param);
|
| 651 |
+
|
| 652 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 653 |
+
cudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack,
|
| 654 |
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
| 655 |
+
void *param,
|
| 656 |
+
int *isNULL);
|
| 657 |
+
|
| 658 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 659 |
+
cudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops);
|
| 660 |
+
|
| 661 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 662 |
+
cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack);
|
| 663 |
+
|
| 664 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 665 |
+
cudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack,
|
| 666 |
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
| 667 |
+
void *ptr);
|
| 668 |
+
|
| 669 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 670 |
+
cudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack,
|
| 671 |
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
| 672 |
+
void *ptr);
|
| 673 |
+
|
| 674 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 675 |
+
cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, cudnnFusedOps_t ops);
|
| 676 |
+
|
| 677 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 678 |
+
cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan);
|
| 679 |
+
|
| 680 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 681 |
+
cudnnMakeFusedOpsPlan(cudnnHandle_t handle,
|
| 682 |
+
cudnnFusedOpsPlan_t plan,
|
| 683 |
+
const cudnnFusedOpsConstParamPack_t constPack,
|
| 684 |
+
size_t *workspaceSizeInBytes);
|
| 685 |
+
|
| 686 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 687 |
+
cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack);
|
| 688 |
+
|
| 689 |
+
#if defined(__cplusplus)
|
| 690 |
+
}
|
| 691 |
+
#endif
|
| 692 |
+
|
| 693 |
+
#endif /* CUDNN_CNN_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_cnn_v9.h
ADDED
|
@@ -0,0 +1,693 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_cnn : cuDNN's basic definitions and CNN functions.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_CNN_H_)
|
| 55 |
+
#define CUDNN_CNN_H_
|
| 56 |
+
|
| 57 |
+
#pragma once
|
| 58 |
+
#include <stdint.h>
|
| 59 |
+
|
| 60 |
+
#include "cudnn_version.h"
|
| 61 |
+
#include "cudnn_ops.h"
|
| 62 |
+
|
| 63 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 64 |
+
#define CUDNN_CNN_MAJOR 9
|
| 65 |
+
#define CUDNN_CNN_MINOR 10
|
| 66 |
+
#define CUDNN_CNN_PATCH 2
|
| 67 |
+
|
| 68 |
+
#if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL)
|
| 69 |
+
#error Version mismatch in cuDNN CNN INFER!!!
|
| 70 |
+
#endif
|
| 71 |
+
|
| 72 |
+
#if defined(__cplusplus)
|
| 73 |
+
extern "C" {
|
| 74 |
+
#endif
|
| 75 |
+
|
| 76 |
+
typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t CUDNN_DEPRECATED;
|
| 77 |
+
|
| 78 |
+
typedef struct cudnnConvolutionFwdAlgoPerfStruct {
|
| 79 |
+
cudnnConvolutionFwdAlgo_t algo;
|
| 80 |
+
cudnnStatus_t status;
|
| 81 |
+
float time;
|
| 82 |
+
size_t memory;
|
| 83 |
+
cudnnDeterminism_t determinism;
|
| 84 |
+
cudnnMathType_t mathType;
|
| 85 |
+
int reserved[3];
|
| 86 |
+
} cudnnConvolutionFwdAlgoPerf_t CUDNN_DEPRECATED;
|
| 87 |
+
|
| 88 |
+
/* Create an instance of convolution descriptor */
|
| 89 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 90 |
+
cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
|
| 91 |
+
|
| 92 |
+
/* Destroy an instance of convolution descriptor */
|
| 93 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 94 |
+
cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
|
| 95 |
+
|
| 96 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 97 |
+
cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
|
| 98 |
+
|
| 99 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 100 |
+
cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
|
| 101 |
+
|
| 102 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 103 |
+
cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
|
| 104 |
+
|
| 105 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 106 |
+
cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
|
| 107 |
+
|
| 108 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 109 |
+
cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
|
| 110 |
+
|
| 111 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 112 |
+
cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
|
| 113 |
+
|
| 114 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 115 |
+
cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 116 |
+
int pad_h, /* zero-padding height */
|
| 117 |
+
int pad_w, /* zero-padding width */
|
| 118 |
+
int u, /* vertical filter stride */
|
| 119 |
+
int v, /* horizontal filter stride */
|
| 120 |
+
int dilation_h, /* filter dilation in the vertical dimension */
|
| 121 |
+
int dilation_w, /* filter dilation in the horizontal dimension */
|
| 122 |
+
cudnnConvolutionMode_t mode,
|
| 123 |
+
cudnnDataType_t computeType);
|
| 124 |
+
|
| 125 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 126 |
+
cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 127 |
+
int *pad_h, /* zero-padding height */
|
| 128 |
+
int *pad_w, /* zero-padding width */
|
| 129 |
+
int *u, /* vertical filter stride */
|
| 130 |
+
int *v, /* horizontal filter stride */
|
| 131 |
+
int *dilation_h, /* filter dilation in the vertical dimension */
|
| 132 |
+
int *dilation_w, /* filter dilation in the horizontal dimension */
|
| 133 |
+
cudnnConvolutionMode_t *mode,
|
| 134 |
+
cudnnDataType_t *computeType);
|
| 135 |
+
|
| 136 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 137 |
+
cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 138 |
+
int arrayLength, /* nbDims-2 size */
|
| 139 |
+
const int padA[],
|
| 140 |
+
const int filterStrideA[],
|
| 141 |
+
const int dilationA[],
|
| 142 |
+
cudnnConvolutionMode_t mode,
|
| 143 |
+
cudnnDataType_t computeType); /* convolution data type */
|
| 144 |
+
|
| 145 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 146 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 147 |
+
cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 148 |
+
int arrayLengthRequested,
|
| 149 |
+
int *arrayLength,
|
| 150 |
+
int padA[],
|
| 151 |
+
int strideA[],
|
| 152 |
+
int dilationA[],
|
| 153 |
+
cudnnConvolutionMode_t *mode,
|
| 154 |
+
cudnnDataType_t *computeType); /* convolution data type */
|
| 155 |
+
|
| 156 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 157 |
+
cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 158 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 159 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 160 |
+
int *n,
|
| 161 |
+
int *c,
|
| 162 |
+
int *h,
|
| 163 |
+
int *w);
|
| 164 |
+
|
| 165 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 166 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 167 |
+
cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 168 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 169 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 170 |
+
int nbDims,
|
| 171 |
+
int tensorOuputDimA[]);
|
| 172 |
+
|
| 173 |
+
/* helper function to provide the convolution forward algo that fit best the requirement */
|
| 174 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 175 |
+
cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 176 |
+
|
| 177 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 178 |
+
cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
|
| 179 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 180 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 181 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 182 |
+
const cudnnTensorDescriptor_t destDesc,
|
| 183 |
+
const int requestedAlgoCount,
|
| 184 |
+
int *returnedAlgoCount,
|
| 185 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 186 |
+
|
| 187 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 188 |
+
cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
|
| 189 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 190 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 191 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 192 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 193 |
+
const int requestedAlgoCount,
|
| 194 |
+
int *returnedAlgoCount,
|
| 195 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 196 |
+
|
| 197 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 198 |
+
cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
|
| 199 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 200 |
+
const void *x,
|
| 201 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 202 |
+
const void *w,
|
| 203 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 204 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 205 |
+
void *y,
|
| 206 |
+
const int requestedAlgoCount,
|
| 207 |
+
int *returnedAlgoCount,
|
| 208 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults,
|
| 209 |
+
void *workSpace,
|
| 210 |
+
size_t workSpaceSizeInBytes);
|
| 211 |
+
|
| 212 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 213 |
+
cudnnIm2Col(cudnnHandle_t handle,
|
| 214 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 215 |
+
const void *x,
|
| 216 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 217 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 218 |
+
void *colBuffer);
|
| 219 |
+
|
| 220 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 221 |
+
cudnnReorderFilterAndBias(cudnnHandle_t handle,
|
| 222 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 223 |
+
cudnnReorderType_t reorderType,
|
| 224 |
+
const void *filterData,
|
| 225 |
+
void *reorderedFilterData,
|
| 226 |
+
int reorderBias,
|
| 227 |
+
const void *biasData,
|
| 228 |
+
void *reorderedBiasData);
|
| 229 |
+
|
| 230 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 231 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 232 |
+
cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
|
| 233 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 234 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 235 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 236 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 237 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 238 |
+
size_t *sizeInBytes);
|
| 239 |
+
|
| 240 |
+
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 241 |
+
|
| 242 |
+
/* Function to perform the forward pass for batch convolution */
|
| 243 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 244 |
+
cudnnConvolutionForward(cudnnHandle_t handle,
|
| 245 |
+
const void *alpha,
|
| 246 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 247 |
+
const void *x,
|
| 248 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 249 |
+
const void *w,
|
| 250 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 251 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 252 |
+
void *workSpace,
|
| 253 |
+
size_t workSpaceSizeInBytes,
|
| 254 |
+
const void *beta,
|
| 255 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 256 |
+
void *y);
|
| 257 |
+
|
| 258 |
+
/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
|
| 259 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 260 |
+
cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
|
| 261 |
+
const void *alpha1,
|
| 262 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 263 |
+
const void *x,
|
| 264 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 265 |
+
const void *w,
|
| 266 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 267 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 268 |
+
void *workSpace,
|
| 269 |
+
size_t workSpaceSizeInBytes,
|
| 270 |
+
const void *alpha2,
|
| 271 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 272 |
+
const void *z,
|
| 273 |
+
const cudnnTensorDescriptor_t biasDesc,
|
| 274 |
+
const void *bias,
|
| 275 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 276 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 277 |
+
void *y);
|
| 278 |
+
|
| 279 |
+
/* helper function to provide the convolution backward data algo that fit best the requirement */
|
| 280 |
+
|
| 281 |
+
typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
|
| 282 |
+
cudnnConvolutionBwdDataAlgo_t algo;
|
| 283 |
+
cudnnStatus_t status;
|
| 284 |
+
float time;
|
| 285 |
+
size_t memory;
|
| 286 |
+
cudnnDeterminism_t determinism;
|
| 287 |
+
cudnnMathType_t mathType;
|
| 288 |
+
int reserved[3];
|
| 289 |
+
} cudnnConvolutionBwdDataAlgoPerf_t CUDNN_DEPRECATED;
|
| 290 |
+
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 293 |
+
|
| 294 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 295 |
+
cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
|
| 296 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 297 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 298 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 299 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 300 |
+
const int requestedAlgoCount,
|
| 301 |
+
int *returnedAlgoCount,
|
| 302 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 303 |
+
|
| 304 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 305 |
+
cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
|
| 306 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 307 |
+
const void *w,
|
| 308 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 309 |
+
const void *dy,
|
| 310 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 311 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 312 |
+
void *dx,
|
| 313 |
+
const int requestedAlgoCount,
|
| 314 |
+
int *returnedAlgoCount,
|
| 315 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
|
| 316 |
+
void *workSpace,
|
| 317 |
+
size_t workSpaceSizeInBytes);
|
| 318 |
+
|
| 319 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 320 |
+
cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
|
| 321 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 322 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 323 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 324 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 325 |
+
const int requestedAlgoCount,
|
| 326 |
+
int *returnedAlgoCount,
|
| 327 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 328 |
+
|
| 329 |
+
/*
|
| 330 |
+
* convolution algorithm (which requires potentially some workspace)
|
| 331 |
+
*/
|
| 332 |
+
|
| 333 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
|
| 336 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 337 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 338 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 339 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 340 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 341 |
+
size_t *sizeInBytes);
|
| 342 |
+
|
| 343 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 344 |
+
cudnnConvolutionBackwardData(cudnnHandle_t handle,
|
| 345 |
+
const void *alpha,
|
| 346 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 347 |
+
const void *w,
|
| 348 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 349 |
+
const void *dy,
|
| 350 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 351 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 352 |
+
void *workSpace,
|
| 353 |
+
size_t workSpaceSizeInBytes,
|
| 354 |
+
const void *beta,
|
| 355 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 356 |
+
void *dx);
|
| 357 |
+
|
| 358 |
+
/* Helper function to calculate folding descriptors for dgrad */
|
| 359 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 360 |
+
cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
|
| 361 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 362 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 363 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 364 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 365 |
+
const cudnnTensorFormat_t transformFormat,
|
| 366 |
+
cudnnFilterDescriptor_t foldedFilterDesc,
|
| 367 |
+
cudnnTensorDescriptor_t paddedDiffDesc,
|
| 368 |
+
cudnnConvolutionDescriptor_t foldedConvDesc,
|
| 369 |
+
cudnnTensorDescriptor_t foldedGradDesc,
|
| 370 |
+
cudnnTensorTransformDescriptor_t filterFoldTransDesc,
|
| 371 |
+
cudnnTensorTransformDescriptor_t diffPadTransDesc,
|
| 372 |
+
cudnnTensorTransformDescriptor_t gradFoldTransDesc,
|
| 373 |
+
cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
|
| 374 |
+
|
| 375 |
+
/* cudnnFusedOps... */
|
| 376 |
+
struct cudnnFusedOpsConstParamStruct;
|
| 377 |
+
typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t CUDNN_DEPRECATED;
|
| 378 |
+
|
| 379 |
+
struct cudnnFusedOpsVariantParamStruct;
|
| 380 |
+
typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t CUDNN_DEPRECATED;
|
| 381 |
+
|
| 382 |
+
struct cudnnFusedOpsPlanStruct;
|
| 383 |
+
typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t CUDNN_DEPRECATED;
|
| 384 |
+
|
| 385 |
+
typedef enum {
|
| 386 |
+
/* each op in [ ] can be disabled by passing NULL ptr */
|
| 387 |
+
/* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
|
| 388 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
|
| 389 |
+
/* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
|
| 390 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
|
| 391 |
+
/* utility for BN training in BN-conv fusion */
|
| 392 |
+
/* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
|
| 393 |
+
/* optionally update running stats and generate saved stats */
|
| 394 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
|
| 395 |
+
/* utility for BN inference in BN-conv fusion */
|
| 396 |
+
/* computes the equivalent scale and bias from learned running stats and learned scale, bias */
|
| 397 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
|
| 398 |
+
/* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
|
| 399 |
+
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
|
| 400 |
+
/* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
|
| 401 |
+
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
|
| 402 |
+
/* reserved for future use */
|
| 403 |
+
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
|
| 404 |
+
} cudnnFusedOps_t CUDNN_DEPRECATED;
|
| 405 |
+
|
| 406 |
+
typedef enum {
|
| 407 |
+
/* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 408 |
+
/* get XDESC: pass previously created cudnnTensorDescriptor_t */
|
| 409 |
+
CUDNN_PARAM_XDESC = 0,
|
| 410 |
+
/* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 411 |
+
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
|
| 412 |
+
/* set/get BN_MODE: pass cudnnBatchNormMode_t* */
|
| 413 |
+
CUDNN_PARAM_BN_MODE = 2,
|
| 414 |
+
/* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 415 |
+
/* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 416 |
+
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
|
| 417 |
+
/* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 418 |
+
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
|
| 419 |
+
/* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 420 |
+
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
|
| 421 |
+
/* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
|
| 422 |
+
/* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
|
| 423 |
+
CUDNN_PARAM_ACTIVATION_DESC = 6,
|
| 424 |
+
/* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
|
| 425 |
+
/* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
|
| 426 |
+
CUDNN_PARAM_CONV_DESC = 7,
|
| 427 |
+
/* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 428 |
+
/* get WDESC: pass previously created cudnnFilterDescriptor_t */
|
| 429 |
+
CUDNN_PARAM_WDESC = 8,
|
| 430 |
+
/* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 431 |
+
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
|
| 432 |
+
/* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 433 |
+
/* get DWDESC: pass previously created cudnnFilterDescriptor_t */
|
| 434 |
+
CUDNN_PARAM_DWDESC = 10,
|
| 435 |
+
/* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 436 |
+
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
|
| 437 |
+
/* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 438 |
+
/* get YDESC: pass previously created cudnnTensorDescriptor_t */
|
| 439 |
+
CUDNN_PARAM_YDESC = 12,
|
| 440 |
+
/* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 441 |
+
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
|
| 442 |
+
/* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 443 |
+
/* get DYDESC: pass previously created cudnnTensorDescriptor_t */
|
| 444 |
+
CUDNN_PARAM_DYDESC = 14,
|
| 445 |
+
/* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 446 |
+
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
|
| 447 |
+
/* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 448 |
+
/* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 449 |
+
CUDNN_PARAM_YSTATS_DESC = 16,
|
| 450 |
+
/* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 451 |
+
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
|
| 452 |
+
/* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 453 |
+
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
|
| 454 |
+
/* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 455 |
+
/* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 456 |
+
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
|
| 457 |
+
/* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 458 |
+
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
|
| 459 |
+
/* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 460 |
+
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
|
| 461 |
+
/* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 462 |
+
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
|
| 463 |
+
/* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 464 |
+
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
|
| 465 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 466 |
+
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
|
| 467 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 468 |
+
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
|
| 469 |
+
|
| 470 |
+
/* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 471 |
+
/* get ZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 472 |
+
CUDNN_PARAM_ZDESC = 26,
|
| 473 |
+
/* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 474 |
+
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
|
| 475 |
+
/* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 476 |
+
/* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 477 |
+
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
|
| 478 |
+
/* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 479 |
+
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
|
| 480 |
+
/* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 481 |
+
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
|
| 482 |
+
|
| 483 |
+
/* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 484 |
+
/* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 485 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
|
| 486 |
+
/* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 487 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
|
| 488 |
+
|
| 489 |
+
/* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 490 |
+
/* get DXDESC: pass previously created cudnnTensorDescriptor_t */
|
| 491 |
+
CUDNN_PARAM_DXDESC = 33,
|
| 492 |
+
/* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 493 |
+
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
|
| 494 |
+
/* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 495 |
+
/* get DZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 496 |
+
CUDNN_PARAM_DZDESC = 35,
|
| 497 |
+
/* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 498 |
+
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
|
| 499 |
+
/* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 500 |
+
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
|
| 501 |
+
/* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 502 |
+
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
|
| 503 |
+
} cudnnFusedOpsConstParamLabel_t CUDNN_DEPRECATED;
|
| 504 |
+
|
| 505 |
+
typedef enum {
|
| 506 |
+
CUDNN_PTR_NULL = 0,
|
| 507 |
+
CUDNN_PTR_ELEM_ALIGNED = 1,
|
| 508 |
+
CUDNN_PTR_16B_ALIGNED = 2,
|
| 509 |
+
} cudnnFusedOpsPointerPlaceHolder_t CUDNN_DEPRECATED;
|
| 510 |
+
|
| 511 |
+
typedef enum {
|
| 512 |
+
/* set: pass void* pointing to dev memory */
|
| 513 |
+
/* get: pass void** pointing to host memory */
|
| 514 |
+
CUDNN_PTR_XDATA = 0,
|
| 515 |
+
CUDNN_PTR_BN_EQSCALE = 1,
|
| 516 |
+
CUDNN_PTR_BN_EQBIAS = 2,
|
| 517 |
+
CUDNN_PTR_WDATA = 3,
|
| 518 |
+
CUDNN_PTR_DWDATA = 4,
|
| 519 |
+
CUDNN_PTR_YDATA = 5,
|
| 520 |
+
CUDNN_PTR_DYDATA = 6,
|
| 521 |
+
CUDNN_PTR_YSUM = 7,
|
| 522 |
+
CUDNN_PTR_YSQSUM = 8,
|
| 523 |
+
CUDNN_PTR_WORKSPACE = 9,
|
| 524 |
+
CUDNN_PTR_BN_SCALE = 10,
|
| 525 |
+
CUDNN_PTR_BN_BIAS = 11,
|
| 526 |
+
CUDNN_PTR_BN_SAVED_MEAN = 12,
|
| 527 |
+
CUDNN_PTR_BN_SAVED_INVSTD = 13,
|
| 528 |
+
CUDNN_PTR_BN_RUNNING_MEAN = 14,
|
| 529 |
+
CUDNN_PTR_BN_RUNNING_VAR = 15,
|
| 530 |
+
CUDNN_PTR_ZDATA = 16,
|
| 531 |
+
CUDNN_PTR_BN_Z_EQSCALE = 17,
|
| 532 |
+
CUDNN_PTR_BN_Z_EQBIAS = 18,
|
| 533 |
+
CUDNN_PTR_ACTIVATION_BITMASK = 19,
|
| 534 |
+
CUDNN_PTR_DXDATA = 20,
|
| 535 |
+
CUDNN_PTR_DZDATA = 21,
|
| 536 |
+
CUDNN_PTR_BN_DSCALE = 22,
|
| 537 |
+
CUDNN_PTR_BN_DBIAS = 23,
|
| 538 |
+
|
| 539 |
+
/* set/get: pass size_t* pointing to host memory */
|
| 540 |
+
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
|
| 541 |
+
/* set/get: pass int64_t* pointing to host memory */
|
| 542 |
+
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
|
| 543 |
+
/* set/get: pass double* pointing to host memory */
|
| 544 |
+
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
|
| 545 |
+
/* set/get: pass double* pointing to host memory */
|
| 546 |
+
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
|
| 547 |
+
} cudnnFusedOpsVariantParamLabel_t CUDNN_DEPRECATED;
|
| 548 |
+
|
| 549 |
+
cudnnStatus_t CUDNNWINAPI
|
| 550 |
+
cudnnCnnVersionCheck(void);
|
| 551 |
+
|
| 552 |
+
/* helper function to provide the convolution backward filter algo that fit best the requirement */
|
| 553 |
+
|
| 554 |
+
typedef struct cudnnConvolutionBwdFilterAlgoPerfStruct {
|
| 555 |
+
cudnnConvolutionBwdFilterAlgo_t algo;
|
| 556 |
+
cudnnStatus_t status;
|
| 557 |
+
float time;
|
| 558 |
+
size_t memory;
|
| 559 |
+
cudnnDeterminism_t determinism;
|
| 560 |
+
cudnnMathType_t mathType;
|
| 561 |
+
int reserved[3];
|
| 562 |
+
} cudnnConvolutionBwdFilterAlgoPerf_t CUDNN_DEPRECATED;
|
| 563 |
+
|
| 564 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 565 |
+
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 566 |
+
|
| 567 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 568 |
+
cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
|
| 569 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 570 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 571 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 572 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 573 |
+
const int requestedAlgoCount,
|
| 574 |
+
int *returnedAlgoCount,
|
| 575 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
| 576 |
+
|
| 577 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 578 |
+
cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
|
| 579 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 580 |
+
const void *x,
|
| 581 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 582 |
+
const void *y,
|
| 583 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 584 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 585 |
+
void *dw,
|
| 586 |
+
const int requestedAlgoCount,
|
| 587 |
+
int *returnedAlgoCount,
|
| 588 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
|
| 589 |
+
void *workSpace,
|
| 590 |
+
size_t workSpaceSizeInBytes);
|
| 591 |
+
|
| 592 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 593 |
+
cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
|
| 594 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 595 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 596 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 597 |
+
const cudnnFilterDescriptor_t gradDesc,
|
| 598 |
+
const int requestedAlgoCount,
|
| 599 |
+
int *returnedAlgoCount,
|
| 600 |
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
| 601 |
+
|
| 602 |
+
/*
|
| 603 |
+
* convolution algorithm (which requires potentially some workspace)
|
| 604 |
+
*/
|
| 605 |
+
|
| 606 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 607 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 608 |
+
cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
|
| 609 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 610 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 611 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 612 |
+
const cudnnFilterDescriptor_t gradDesc,
|
| 613 |
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
| 614 |
+
size_t *sizeInBytes);
|
| 615 |
+
|
| 616 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 617 |
+
cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
|
| 618 |
+
const void *alpha,
|
| 619 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 620 |
+
const void *x,
|
| 621 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 622 |
+
const void *dy,
|
| 623 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 624 |
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
| 625 |
+
void *workSpace,
|
| 626 |
+
size_t workSpaceSizeInBytes,
|
| 627 |
+
const void *beta,
|
| 628 |
+
const cudnnFilterDescriptor_t dwDesc,
|
| 629 |
+
void *dw);
|
| 630 |
+
|
| 631 |
+
/* Function to compute the bias gradient for batch convolution */
|
| 632 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 633 |
+
cudnnConvolutionBackwardBias(cudnnHandle_t handle,
|
| 634 |
+
const void *alpha,
|
| 635 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 636 |
+
const void *dy,
|
| 637 |
+
const void *beta,
|
| 638 |
+
const cudnnTensorDescriptor_t dbDesc,
|
| 639 |
+
void *db);
|
| 640 |
+
|
| 641 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 642 |
+
cudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops);
|
| 643 |
+
|
| 644 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 645 |
+
cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack);
|
| 646 |
+
|
| 647 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 648 |
+
cudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack,
|
| 649 |
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
| 650 |
+
const void *param);
|
| 651 |
+
|
| 652 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 653 |
+
cudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack,
|
| 654 |
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
| 655 |
+
void *param,
|
| 656 |
+
int *isNULL);
|
| 657 |
+
|
| 658 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 659 |
+
cudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops);
|
| 660 |
+
|
| 661 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 662 |
+
cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack);
|
| 663 |
+
|
| 664 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 665 |
+
cudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack,
|
| 666 |
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
| 667 |
+
void *ptr);
|
| 668 |
+
|
| 669 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 670 |
+
cudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack,
|
| 671 |
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
| 672 |
+
void *ptr);
|
| 673 |
+
|
| 674 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 675 |
+
cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, cudnnFusedOps_t ops);
|
| 676 |
+
|
| 677 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 678 |
+
cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan);
|
| 679 |
+
|
| 680 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 681 |
+
cudnnMakeFusedOpsPlan(cudnnHandle_t handle,
|
| 682 |
+
cudnnFusedOpsPlan_t plan,
|
| 683 |
+
const cudnnFusedOpsConstParamPack_t constPack,
|
| 684 |
+
size_t *workspaceSizeInBytes);
|
| 685 |
+
|
| 686 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 687 |
+
cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack);
|
| 688 |
+
|
| 689 |
+
#if defined(__cplusplus)
|
| 690 |
+
}
|
| 691 |
+
#endif
|
| 692 |
+
|
| 693 |
+
#endif /* CUDNN_CNN_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_graph.h
ADDED
|
@@ -0,0 +1,992 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_graph : cuDNN's basic definitions operations.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_GRAPH_H_)
|
| 55 |
+
#define CUDNN_GRAPH_H_
|
| 56 |
+
|
| 57 |
+
#include <cuda_runtime_api.h>
|
| 58 |
+
#include <library_types.h>
|
| 59 |
+
|
| 60 |
+
#include <stdint.h>
|
| 61 |
+
|
| 62 |
+
#include "cudnn_version.h"
|
| 63 |
+
|
| 64 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 65 |
+
#define CUDNN_GRAPH_MAJOR 9
|
| 66 |
+
#define CUDNN_GRAPH_MINOR 10
|
| 67 |
+
#define CUDNN_GRAPH_PATCH 2
|
| 68 |
+
|
| 69 |
+
#if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
|
| 70 |
+
#error Version mismatch in cuDNN GRAPH!!!
|
| 71 |
+
#endif
|
| 72 |
+
|
| 73 |
+
#ifndef CUDNNWINAPI
|
| 74 |
+
#ifdef _WIN32
|
| 75 |
+
#define CUDNNWINAPI __stdcall
|
| 76 |
+
#else
|
| 77 |
+
#define CUDNNWINAPI
|
| 78 |
+
#endif
|
| 79 |
+
#endif
|
| 80 |
+
|
| 81 |
+
/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
|
| 82 |
+
#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
|
| 83 |
+
/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
|
| 84 |
+
#define CUDNN_DEPRECATED __attribute__((deprecated))
|
| 85 |
+
#define CUDNN_DEPRECATED_ENUM __attribute__((deprecated))
|
| 86 |
+
#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
|
| 87 |
+
/* Microsoft Visual C++ */
|
| 88 |
+
#define CUDNN_DEPRECATED __declspec(deprecated)
|
| 89 |
+
#define CUDNN_DEPRECATED_ENUM __declspec(deprecated)
|
| 90 |
+
#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
|
| 91 |
+
/* C++14 compilers */
|
| 92 |
+
#define CUDNN_DEPRECATED [[deprecated]]
|
| 93 |
+
#define CUDNN_DEPRECATED_ENUM [[deprecated]]
|
| 94 |
+
#else
|
| 95 |
+
/* No support for the deprecated attribute */
|
| 96 |
+
#define CUDNN_DEPRECATED
|
| 97 |
+
#define CUDNN_DEPRECATED_ENUM
|
| 98 |
+
#endif
|
| 99 |
+
|
| 100 |
+
#if defined(__cplusplus)
|
| 101 |
+
extern "C" {
|
| 102 |
+
#endif
|
| 103 |
+
|
| 104 |
+
struct cudnnContext;
|
| 105 |
+
typedef struct cudnnContext *cudnnHandle_t;
|
| 106 |
+
|
| 107 |
+
size_t CUDNNWINAPI
|
| 108 |
+
cudnnGetVersion(void);
|
| 109 |
+
|
| 110 |
+
size_t CUDNNWINAPI
|
| 111 |
+
cudnnGetMaxDeviceVersion(void);
|
| 112 |
+
|
| 113 |
+
/* Returns CUDA Runtime version statically linked against cudnn */
|
| 114 |
+
size_t CUDNNWINAPI
|
| 115 |
+
cudnnGetCudartVersion(void);
|
| 116 |
+
|
| 117 |
+
/*
|
| 118 |
+
* CUDNN return codes
|
| 119 |
+
*/
|
| 120 |
+
typedef enum {
|
| 121 |
+
CUDNN_STATUS_SUCCESS = 0,
|
| 122 |
+
|
| 123 |
+
/* Uncategorized errors */
|
| 124 |
+
CUDNN_STATUS_NOT_INITIALIZED = 1001,
|
| 125 |
+
CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH = 1002,
|
| 126 |
+
CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH = 1003,
|
| 127 |
+
CUDNN_STATUS_DEPRECATED = 1004,
|
| 128 |
+
CUDNN_STATUS_LICENSE_ERROR = 1005,
|
| 129 |
+
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 1006,
|
| 130 |
+
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 1007,
|
| 131 |
+
CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED = 1008,
|
| 132 |
+
|
| 133 |
+
CUDNN_STATUS_BAD_PARAM = 2000,
|
| 134 |
+
CUDNN_STATUS_BAD_PARAM_NULL_POINTER = 2002,
|
| 135 |
+
CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER = 2003,
|
| 136 |
+
CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED = 2004,
|
| 137 |
+
CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND = 2005,
|
| 138 |
+
CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT = 2006,
|
| 139 |
+
CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH = 2007,
|
| 140 |
+
CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008,
|
| 141 |
+
CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009,
|
| 142 |
+
CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010,
|
| 143 |
+
CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
|
| 144 |
+
CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE = 2012,
|
| 145 |
+
|
| 146 |
+
CUDNN_STATUS_NOT_SUPPORTED = 3000,
|
| 147 |
+
CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001,
|
| 148 |
+
CUDNN_STATUS_NOT_SUPPORTED_SHAPE = 3002,
|
| 149 |
+
CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE = 3003,
|
| 150 |
+
CUDNN_STATUS_NOT_SUPPORTED_LAYOUT = 3004,
|
| 151 |
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER = 3005,
|
| 152 |
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART = 3006,
|
| 153 |
+
CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH = 3007,
|
| 154 |
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING = 3008,
|
| 155 |
+
CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE = 3009,
|
| 156 |
+
CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010,
|
| 157 |
+
CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011,
|
| 158 |
+
CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012,
|
| 159 |
+
CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013,
|
| 160 |
+
|
| 161 |
+
CUDNN_STATUS_INTERNAL_ERROR = 4000,
|
| 162 |
+
CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001,
|
| 163 |
+
CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE = 4002,
|
| 164 |
+
CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED = 4003,
|
| 165 |
+
CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED = 4004,
|
| 166 |
+
CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM = 4005,
|
| 167 |
+
CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED = 4006,
|
| 168 |
+
|
| 169 |
+
CUDNN_STATUS_EXECUTION_FAILED = 5000,
|
| 170 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER = 5001,
|
| 171 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUBLAS = 5002,
|
| 172 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUDART = 5003,
|
| 173 |
+
CUDNN_STATUS_EXECUTION_FAILED_CURAND = 5004,
|
| 174 |
+
|
| 175 |
+
CUDNN_STATUS_ALLOC_FAILED CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED,
|
| 176 |
+
CUDNN_STATUS_INVALID_VALUE CUDNN_DEPRECATED_ENUM = 2001 /* please transition to CUDNN_STATUS_BAD_PARAM instead */,
|
| 177 |
+
CUDNN_STATUS_ARCH_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH,
|
| 178 |
+
CUDNN_STATUS_MAPPING_ERROR CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED,
|
| 179 |
+
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING CUDNN_DEPRECATED_ENUM =
|
| 180 |
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING,
|
| 181 |
+
CUDNN_STATUS_VERSION_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH,
|
| 182 |
+
} cudnnStatus_t;
|
| 183 |
+
|
| 184 |
+
#define CUDNN_STATUS_FULL_ERROR_CODE(category, specific_err) ((cudnnStatus_t)(0 + (category) + (specific_err)))
|
| 185 |
+
#define CUDNN_STATUS_CATEGORY(full_error_code) ((full_error_code) / 1000 * 1000)
|
| 186 |
+
#define CUDNN_STATUS_SPECIFIC_ERROR(full_error_code) ((full_error_code) % 1000)
|
| 187 |
+
|
| 188 |
+
/* human-readable error messages */
|
| 189 |
+
const char *CUDNNWINAPI
|
| 190 |
+
cudnnGetErrorString(cudnnStatus_t status);
|
| 191 |
+
|
| 192 |
+
void CUDNNWINAPI
|
| 193 |
+
cudnnGetLastErrorString(char *message, size_t max_size);
|
| 194 |
+
|
| 195 |
+
/* Forward definition in this version only */
|
| 196 |
+
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t CUDNN_DEPRECATED;
|
| 197 |
+
|
| 198 |
+
typedef enum {
|
| 199 |
+
CUDNN_ERRQUERY_RAWCODE = 0,
|
| 200 |
+
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
| 201 |
+
CUDNN_ERRQUERY_BLOCKING = 2,
|
| 202 |
+
} cudnnErrQueryMode_t;
|
| 203 |
+
|
| 204 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 205 |
+
cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
|
| 206 |
+
|
| 207 |
+
cudnnStatus_t CUDNNWINAPI
|
| 208 |
+
cudnnGetProperty(libraryPropertyType type, int *value);
|
| 209 |
+
|
| 210 |
+
cudnnStatus_t CUDNNWINAPI
|
| 211 |
+
cudnnCreate(cudnnHandle_t *handle);
|
| 212 |
+
cudnnStatus_t CUDNNWINAPI
|
| 213 |
+
cudnnDestroy(cudnnHandle_t handle);
|
| 214 |
+
cudnnStatus_t CUDNNWINAPI
|
| 215 |
+
cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
| 216 |
+
cudnnStatus_t CUDNNWINAPI
|
| 217 |
+
cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
|
| 218 |
+
/*
|
| 219 |
+
* CUDNN data type
|
| 220 |
+
*/
|
| 221 |
+
typedef enum {
|
| 222 |
+
CUDNN_DATA_FLOAT = 0,
|
| 223 |
+
CUDNN_DATA_DOUBLE = 1,
|
| 224 |
+
CUDNN_DATA_HALF = 2,
|
| 225 |
+
CUDNN_DATA_INT8 = 3,
|
| 226 |
+
CUDNN_DATA_INT32 = 4,
|
| 227 |
+
CUDNN_DATA_INT8x4 CUDNN_DEPRECATED_ENUM = 5,
|
| 228 |
+
CUDNN_DATA_UINT8 = 6,
|
| 229 |
+
CUDNN_DATA_UINT8x4 CUDNN_DEPRECATED_ENUM = 7,
|
| 230 |
+
CUDNN_DATA_INT8x32 CUDNN_DEPRECATED_ENUM = 8,
|
| 231 |
+
CUDNN_DATA_BFLOAT16 = 9,
|
| 232 |
+
CUDNN_DATA_INT64 = 10,
|
| 233 |
+
CUDNN_DATA_BOOLEAN = 11,
|
| 234 |
+
CUDNN_DATA_FP8_E4M3 = 12,
|
| 235 |
+
CUDNN_DATA_FP8_E5M2 = 13,
|
| 236 |
+
CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
|
| 237 |
+
CUDNN_DATA_FP8_E8M0 = 15,
|
| 238 |
+
CUDNN_DATA_FP4_E2M1 = 16,
|
| 239 |
+
} cudnnDataType_t;
|
| 240 |
+
|
| 241 |
+
/*
|
| 242 |
+
* CUDNN math type
|
| 243 |
+
*/
|
| 244 |
+
typedef enum {
|
| 245 |
+
CUDNN_DEFAULT_MATH = 0,
|
| 246 |
+
CUDNN_TENSOR_OP_MATH = 1,
|
| 247 |
+
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
| 248 |
+
CUDNN_FMA_MATH = 3,
|
| 249 |
+
} cudnnMathType_t;
|
| 250 |
+
|
| 251 |
+
/*
|
| 252 |
+
* CUDNN propagate Nan
|
| 253 |
+
*/
|
| 254 |
+
typedef enum {
|
| 255 |
+
CUDNN_NOT_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 0,
|
| 256 |
+
CUDNN_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 1,
|
| 257 |
+
} cudnnNanPropagation_t;
|
| 258 |
+
|
| 259 |
+
/*
|
| 260 |
+
* Behavior for OOB samples. OOB samples are samples where L+R > T is encountered during the gradient calculation. If
|
| 261 |
+
* gradMode is set to CUDNN_CTC_SKIP_OOB_GRADIENTS, then the CTC loss function does not write to the gradient buffer for
|
| 262 |
+
* that sample. Instead, the current values, even not finite, are retained. If gradMode is set to
|
| 263 |
+
* CUDNN_CTC_ZERO_OOB_GRADIENTS, then the gradient for that sample is set to zero. This guarantees a finite gradient.
|
| 264 |
+
*/
|
| 265 |
+
typedef enum {
|
| 266 |
+
CUDNN_CTC_ZERO_OOB_GRADIENTS = 0,
|
| 267 |
+
CUDNN_CTC_SKIP_OOB_GRADIENTS = 1,
|
| 268 |
+
} cudnnCTCGradMode_t;
|
| 269 |
+
|
| 270 |
+
typedef enum {
|
| 271 |
+
CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
|
| 272 |
+
CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
|
| 273 |
+
CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
|
| 274 |
+
} cudnnTensorFormat_t;
|
| 275 |
+
|
| 276 |
+
/*
|
| 277 |
+
* CUDNN ReduceTensor op type
|
| 278 |
+
*/
|
| 279 |
+
typedef enum {
|
| 280 |
+
CUDNN_REDUCE_TENSOR_ADD = 0,
|
| 281 |
+
CUDNN_REDUCE_TENSOR_MUL = 1,
|
| 282 |
+
CUDNN_REDUCE_TENSOR_MIN = 2,
|
| 283 |
+
CUDNN_REDUCE_TENSOR_MAX = 3,
|
| 284 |
+
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
| 285 |
+
CUDNN_REDUCE_TENSOR_AVG = 5,
|
| 286 |
+
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
| 287 |
+
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
| 288 |
+
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
| 289 |
+
} cudnnReduceTensorOp_t;
|
| 290 |
+
|
| 291 |
+
/*
|
| 292 |
+
* activation mode
|
| 293 |
+
*/
|
| 294 |
+
typedef enum {
|
| 295 |
+
CUDNN_ACTIVATION_SIGMOID = 0,
|
| 296 |
+
CUDNN_ACTIVATION_RELU = 1,
|
| 297 |
+
CUDNN_ACTIVATION_TANH = 2,
|
| 298 |
+
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
| 299 |
+
CUDNN_ACTIVATION_ELU = 4,
|
| 300 |
+
CUDNN_ACTIVATION_IDENTITY = 5,
|
| 301 |
+
CUDNN_ACTIVATION_SWISH = 6
|
| 302 |
+
} cudnnActivationMode_t CUDNN_DEPRECATED;
|
| 303 |
+
|
| 304 |
+
typedef enum {
|
| 305 |
+
CUDNN_SEV_FATAL = 0,
|
| 306 |
+
CUDNN_SEV_ERROR = 1,
|
| 307 |
+
CUDNN_SEV_WARNING = 2,
|
| 308 |
+
CUDNN_SEV_INFO = 3,
|
| 309 |
+
} cudnnSeverity_t;
|
| 310 |
+
|
| 311 |
+
/* Message masks to be used with cudnnSetCallback() */
|
| 312 |
+
#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
|
| 313 |
+
#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
|
| 314 |
+
#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
|
| 315 |
+
|
| 316 |
+
/* struct containing useful informaiton for each API call */
|
| 317 |
+
typedef struct cudnnDebugStruct {
|
| 318 |
+
unsigned cudnn_version;
|
| 319 |
+
cudnnStatus_t cudnnStatus;
|
| 320 |
+
unsigned time_sec; /* epoch time in seconds */
|
| 321 |
+
unsigned time_usec; /* microseconds part of epoch time */
|
| 322 |
+
unsigned time_delta; /* time since start in seconds */
|
| 323 |
+
cudnnHandle_t handle; /* cudnn handle */
|
| 324 |
+
cudaStream_t stream; /* cuda stream ID */
|
| 325 |
+
unsigned long long pid; /* process ID */
|
| 326 |
+
unsigned long long tid; /* thread ID */
|
| 327 |
+
int cudaDeviceId; /* CUDA device ID */
|
| 328 |
+
int reserved[15]; /* reserved for future use */
|
| 329 |
+
} cudnnDebug_t;
|
| 330 |
+
|
| 331 |
+
typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
|
| 332 |
+
|
| 333 |
+
cudnnStatus_t CUDNNWINAPI
|
| 334 |
+
cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
|
| 335 |
+
|
| 336 |
+
cudnnStatus_t CUDNNWINAPI
|
| 337 |
+
cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
|
| 338 |
+
|
| 339 |
+
/*
|
| 340 |
+
* \brief Cross-library version checker.
|
| 341 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 342 |
+
* checks whether its own version matches that of its dependencies.
|
| 343 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 344 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 345 |
+
*/
|
| 346 |
+
cudnnStatus_t CUDNNWINAPI
|
| 347 |
+
cudnnGraphVersionCheck(void);
|
| 348 |
+
|
| 349 |
+
/* Maximum supported number of tensor dimensions */
|
| 350 |
+
#define CUDNN_DIM_MAX 8
|
| 351 |
+
|
| 352 |
+
/*
|
| 353 |
+
* convolution mode
|
| 354 |
+
*/
|
| 355 |
+
typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
|
| 356 |
+
|
| 357 |
+
/*
|
| 358 |
+
* CUDNN Reorder
|
| 359 |
+
*/
|
| 360 |
+
typedef enum {
|
| 361 |
+
CUDNN_DEFAULT_REORDER = 0,
|
| 362 |
+
CUDNN_NO_REORDER = 1,
|
| 363 |
+
} cudnnReorderType_t CUDNN_DEPRECATED;
|
| 364 |
+
|
| 365 |
+
typedef void *cudnnBackendDescriptor_t;
|
| 366 |
+
|
| 367 |
+
typedef struct cudnnFractionStruct {
|
| 368 |
+
int64_t numerator;
|
| 369 |
+
int64_t denominator;
|
| 370 |
+
} cudnnFraction_t;
|
| 371 |
+
|
| 372 |
+
typedef enum {
|
| 373 |
+
CUDNN_POINTWISE_ADD = 0,
|
| 374 |
+
CUDNN_POINTWISE_ADD_SQUARE = 5,
|
| 375 |
+
CUDNN_POINTWISE_DIV = 6,
|
| 376 |
+
CUDNN_POINTWISE_MAX = 3,
|
| 377 |
+
CUDNN_POINTWISE_MIN = 2,
|
| 378 |
+
CUDNN_POINTWISE_MOD = 7,
|
| 379 |
+
CUDNN_POINTWISE_MUL = 1,
|
| 380 |
+
CUDNN_POINTWISE_POW = 8,
|
| 381 |
+
CUDNN_POINTWISE_SUB = 9,
|
| 382 |
+
|
| 383 |
+
CUDNN_POINTWISE_ABS = 10,
|
| 384 |
+
CUDNN_POINTWISE_CEIL = 11,
|
| 385 |
+
CUDNN_POINTWISE_COS = 12,
|
| 386 |
+
CUDNN_POINTWISE_EXP = 13,
|
| 387 |
+
CUDNN_POINTWISE_FLOOR = 14,
|
| 388 |
+
CUDNN_POINTWISE_LOG = 15,
|
| 389 |
+
CUDNN_POINTWISE_NEG = 16,
|
| 390 |
+
CUDNN_POINTWISE_RSQRT = 17,
|
| 391 |
+
CUDNN_POINTWISE_SIN = 18,
|
| 392 |
+
CUDNN_POINTWISE_SQRT = 4,
|
| 393 |
+
CUDNN_POINTWISE_TAN = 19,
|
| 394 |
+
CUDNN_POINTWISE_ERF = 20,
|
| 395 |
+
CUDNN_POINTWISE_IDENTITY = 21,
|
| 396 |
+
CUDNN_POINTWISE_RECIPROCAL = 22,
|
| 397 |
+
CUDNN_POINTWISE_ATAN2 = 23,
|
| 398 |
+
|
| 399 |
+
CUDNN_POINTWISE_RELU_FWD = 100,
|
| 400 |
+
CUDNN_POINTWISE_TANH_FWD = 101,
|
| 401 |
+
CUDNN_POINTWISE_SIGMOID_FWD = 102,
|
| 402 |
+
CUDNN_POINTWISE_ELU_FWD = 103,
|
| 403 |
+
CUDNN_POINTWISE_GELU_FWD = 104,
|
| 404 |
+
CUDNN_POINTWISE_SOFTPLUS_FWD = 105,
|
| 405 |
+
CUDNN_POINTWISE_SWISH_FWD = 106,
|
| 406 |
+
CUDNN_POINTWISE_GELU_APPROX_TANH_FWD = 107,
|
| 407 |
+
|
| 408 |
+
CUDNN_POINTWISE_RELU_BWD = 200,
|
| 409 |
+
CUDNN_POINTWISE_TANH_BWD = 201,
|
| 410 |
+
CUDNN_POINTWISE_SIGMOID_BWD = 202,
|
| 411 |
+
CUDNN_POINTWISE_ELU_BWD = 203,
|
| 412 |
+
CUDNN_POINTWISE_GELU_BWD = 204,
|
| 413 |
+
CUDNN_POINTWISE_SOFTPLUS_BWD = 205,
|
| 414 |
+
CUDNN_POINTWISE_SWISH_BWD = 206,
|
| 415 |
+
CUDNN_POINTWISE_GELU_APPROX_TANH_BWD = 207,
|
| 416 |
+
|
| 417 |
+
CUDNN_POINTWISE_CMP_EQ = 300,
|
| 418 |
+
CUDNN_POINTWISE_CMP_NEQ = 301,
|
| 419 |
+
CUDNN_POINTWISE_CMP_GT = 302,
|
| 420 |
+
CUDNN_POINTWISE_CMP_GE = 303,
|
| 421 |
+
CUDNN_POINTWISE_CMP_LT = 304,
|
| 422 |
+
CUDNN_POINTWISE_CMP_LE = 305,
|
| 423 |
+
|
| 424 |
+
CUDNN_POINTWISE_LOGICAL_AND = 400,
|
| 425 |
+
CUDNN_POINTWISE_LOGICAL_OR = 401,
|
| 426 |
+
CUDNN_POINTWISE_LOGICAL_NOT = 402,
|
| 427 |
+
|
| 428 |
+
CUDNN_POINTWISE_GEN_INDEX = 501,
|
| 429 |
+
|
| 430 |
+
CUDNN_POINTWISE_BINARY_SELECT = 601,
|
| 431 |
+
} cudnnPointwiseMode_t;
|
| 432 |
+
|
| 433 |
+
typedef enum {
|
| 434 |
+
CUDNN_RESAMPLE_NEAREST = 0,
|
| 435 |
+
CUDNN_RESAMPLE_BILINEAR = 1,
|
| 436 |
+
CUDNN_RESAMPLE_AVGPOOL = 2,
|
| 437 |
+
CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING = 2,
|
| 438 |
+
CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING = 4,
|
| 439 |
+
CUDNN_RESAMPLE_MAXPOOL = 3,
|
| 440 |
+
} cudnnResampleMode_t;
|
| 441 |
+
|
| 442 |
+
typedef enum {
|
| 443 |
+
CUDNN_SIGNAL_SET = 0,
|
| 444 |
+
CUDNN_SIGNAL_WAIT = 1,
|
| 445 |
+
} cudnnSignalMode_t;
|
| 446 |
+
|
| 447 |
+
typedef enum {
|
| 448 |
+
CUDNN_GENSTATS_SUM_SQSUM = 0,
|
| 449 |
+
} cudnnGenStatsMode_t;
|
| 450 |
+
|
| 451 |
+
typedef enum {
|
| 452 |
+
CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0,
|
| 453 |
+
CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1,
|
| 454 |
+
} cudnnBnFinalizeStatsMode_t;
|
| 455 |
+
|
| 456 |
+
typedef enum {
|
| 457 |
+
CUDNN_RNG_DISTRIBUTION_BERNOULLI = 0,
|
| 458 |
+
CUDNN_RNG_DISTRIBUTION_UNIFORM = 1,
|
| 459 |
+
CUDNN_RNG_DISTRIBUTION_NORMAL = 2,
|
| 460 |
+
} cudnnRngDistribution_t;
|
| 461 |
+
|
| 462 |
+
typedef enum {
|
| 463 |
+
CUDNN_ATTR_POINTWISE_MODE = 0,
|
| 464 |
+
CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
|
| 465 |
+
CUDNN_ATTR_POINTWISE_NAN_PROPAGATION CUDNN_DEPRECATED_ENUM = 2,
|
| 466 |
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
|
| 467 |
+
CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
|
| 468 |
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5,
|
| 469 |
+
CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6,
|
| 470 |
+
CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7,
|
| 471 |
+
CUDNN_ATTR_POINTWISE_SWISH_BETA = 8,
|
| 472 |
+
CUDNN_ATTR_POINTWISE_AXIS = 9,
|
| 473 |
+
|
| 474 |
+
CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
|
| 475 |
+
CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
|
| 476 |
+
CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
|
| 477 |
+
CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
|
| 478 |
+
CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
|
| 479 |
+
CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
|
| 480 |
+
CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
|
| 481 |
+
|
| 482 |
+
CUDNN_ATTR_ENGINEHEUR_MODE = 200,
|
| 483 |
+
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
|
| 484 |
+
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
|
| 485 |
+
CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203,
|
| 486 |
+
CUDNN_ATTR_ENGINEHEUR_DEVICEPROP = 204,
|
| 487 |
+
|
| 488 |
+
CUDNN_ATTR_ENGINECFG_ENGINE = 300,
|
| 489 |
+
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
|
| 490 |
+
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
|
| 491 |
+
CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE = 303,
|
| 492 |
+
CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED = 304,
|
| 493 |
+
|
| 494 |
+
CUDNN_ATTR_EXECUTION_PLAN_HANDLE CUDNN_DEPRECATED_ENUM = 400,
|
| 495 |
+
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
|
| 496 |
+
CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
|
| 497 |
+
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
|
| 498 |
+
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
|
| 499 |
+
CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
|
| 500 |
+
CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406,
|
| 501 |
+
CUDNN_ATTR_EXECUTION_PLAN_DEVICEPROP = 407,
|
| 502 |
+
|
| 503 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
|
| 504 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
|
| 505 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
|
| 506 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
|
| 507 |
+
|
| 508 |
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
|
| 509 |
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
|
| 510 |
+
|
| 511 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
|
| 512 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
|
| 513 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
|
| 514 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
|
| 515 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
|
| 516 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
|
| 517 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
|
| 518 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
|
| 519 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
|
| 520 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
|
| 521 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
|
| 522 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
|
| 523 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
|
| 524 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
|
| 525 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
|
| 526 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
|
| 527 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
|
| 528 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
|
| 529 |
+
|
| 530 |
+
CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
|
| 531 |
+
CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
|
| 532 |
+
CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
|
| 533 |
+
CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
|
| 534 |
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
|
| 535 |
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
|
| 536 |
+
CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756,
|
| 537 |
+
CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757,
|
| 538 |
+
CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758,
|
| 539 |
+
|
| 540 |
+
CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
|
| 541 |
+
CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
|
| 542 |
+
CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
|
| 543 |
+
CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
|
| 544 |
+
CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
|
| 545 |
+
|
| 546 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780,
|
| 547 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781,
|
| 548 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782,
|
| 549 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783,
|
| 550 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784,
|
| 551 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785,
|
| 552 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786,
|
| 553 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787,
|
| 554 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788,
|
| 555 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789,
|
| 556 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790,
|
| 557 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791,
|
| 558 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792,
|
| 559 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793,
|
| 560 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794,
|
| 561 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795,
|
| 562 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796,
|
| 563 |
+
|
| 564 |
+
CUDNN_ATTR_OPERATIONGRAPH_HANDLE CUDNN_DEPRECATED_ENUM = 800,
|
| 565 |
+
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
|
| 566 |
+
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
|
| 567 |
+
CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
|
| 568 |
+
CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY = 804,
|
| 569 |
+
|
| 570 |
+
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
|
| 571 |
+
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
|
| 572 |
+
CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
|
| 573 |
+
CUDNN_ATTR_TENSOR_STRIDES = 903,
|
| 574 |
+
CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
|
| 575 |
+
CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
|
| 576 |
+
CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
|
| 577 |
+
CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
|
| 578 |
+
CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908,
|
| 579 |
+
CUDNN_ATTR_TENSOR_REORDERING_MODE = 909,
|
| 580 |
+
CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC = 913,
|
| 581 |
+
|
| 582 |
+
CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
|
| 583 |
+
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
|
| 584 |
+
CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
|
| 585 |
+
CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
|
| 586 |
+
|
| 587 |
+
CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
|
| 588 |
+
CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
|
| 589 |
+
|
| 590 |
+
CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
|
| 591 |
+
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
|
| 592 |
+
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
|
| 593 |
+
CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
|
| 594 |
+
|
| 595 |
+
CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
|
| 596 |
+
CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
|
| 597 |
+
CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
|
| 598 |
+
CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
|
| 599 |
+
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
|
| 600 |
+
CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305,
|
| 601 |
+
CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306,
|
| 602 |
+
CUDNN_ATTR_ENGINE_DEVICEPROP = 1307,
|
| 603 |
+
|
| 604 |
+
CUDNN_ATTR_MATMUL_COMP_TYPE = 1500,
|
| 605 |
+
CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503,
|
| 606 |
+
|
| 607 |
+
CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520,
|
| 608 |
+
CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521,
|
| 609 |
+
CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522,
|
| 610 |
+
CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523,
|
| 611 |
+
CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT CUDNN_DEPRECATED_ENUM = 1524,
|
| 612 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC = 1525,
|
| 613 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC = 1526,
|
| 614 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC = 1527,
|
| 615 |
+
|
| 616 |
+
CUDNN_ATTR_REDUCTION_OPERATOR = 1600,
|
| 617 |
+
CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601,
|
| 618 |
+
|
| 619 |
+
CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610,
|
| 620 |
+
CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611,
|
| 621 |
+
CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612,
|
| 622 |
+
|
| 623 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620,
|
| 624 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621,
|
| 625 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622,
|
| 626 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623,
|
| 627 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624,
|
| 628 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625,
|
| 629 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626,
|
| 630 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627,
|
| 631 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628,
|
| 632 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629,
|
| 633 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630,
|
| 634 |
+
|
| 635 |
+
CUDNN_ATTR_RESAMPLE_MODE = 1700,
|
| 636 |
+
CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701,
|
| 637 |
+
CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702,
|
| 638 |
+
CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703,
|
| 639 |
+
CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704,
|
| 640 |
+
CUDNN_ATTR_RESAMPLE_STRIDES = 1705,
|
| 641 |
+
CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706,
|
| 642 |
+
CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707,
|
| 643 |
+
CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708,
|
| 644 |
+
|
| 645 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710,
|
| 646 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711,
|
| 647 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712,
|
| 648 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA CUDNN_DEPRECATED_ENUM = 1713,
|
| 649 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA CUDNN_DEPRECATED_ENUM = 1714,
|
| 650 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716,
|
| 651 |
+
|
| 652 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720,
|
| 653 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721,
|
| 654 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722,
|
| 655 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA CUDNN_DEPRECATED_ENUM = 1723,
|
| 656 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA CUDNN_DEPRECATED_ENUM = 1724,
|
| 657 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725,
|
| 658 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC = 1726,
|
| 659 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC = 1727,
|
| 660 |
+
|
| 661 |
+
CUDNN_ATTR_OPERATION_CONCAT_AXIS = 1800,
|
| 662 |
+
CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS = 1801,
|
| 663 |
+
CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX = 1802,
|
| 664 |
+
CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC = 1803,
|
| 665 |
+
|
| 666 |
+
CUDNN_ATTR_OPERATION_SIGNAL_MODE = 1900,
|
| 667 |
+
CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC = 1901,
|
| 668 |
+
CUDNN_ATTR_OPERATION_SIGNAL_VALUE = 1902,
|
| 669 |
+
CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
|
| 670 |
+
CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
|
| 671 |
+
|
| 672 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950,
|
| 673 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951,
|
| 674 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952,
|
| 675 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
|
| 676 |
+
|
| 677 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
|
| 678 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
|
| 679 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
|
| 680 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC = 2003,
|
| 681 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC = 2004,
|
| 682 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC = 2005,
|
| 683 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC = 2006,
|
| 684 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC = 2007,
|
| 685 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC = 2008,
|
| 686 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC = 2009,
|
| 687 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC = 2010,
|
| 688 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC = 2011,
|
| 689 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC = 2012,
|
| 690 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_YDESC = 2013,
|
| 691 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS = 2014,
|
| 692 |
+
|
| 693 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_MODE = 2100,
|
| 694 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_XDESC = 2101,
|
| 695 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC = 2102,
|
| 696 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC = 2103,
|
| 697 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC = 2104,
|
| 698 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC = 2105,
|
| 699 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC = 2106,
|
| 700 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC = 2107,
|
| 701 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC = 2108,
|
| 702 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC = 2109,
|
| 703 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110,
|
| 704 |
+
|
| 705 |
+
CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200,
|
| 706 |
+
CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201,
|
| 707 |
+
|
| 708 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_XDESC = 2250,
|
| 709 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_YDESC = 2251,
|
| 710 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_LOWER_BANDWIDTH = 2252,
|
| 711 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_UPPER_BANDWIDTH = 2253,
|
| 712 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_AXIS = 2254,
|
| 713 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_PAD_VALUE = 2255,
|
| 714 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_KV_TOKEN_OFFSET_DESC = 2256,
|
| 715 |
+
|
| 716 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_XDESC = 2270,
|
| 717 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_YDESC = 2271,
|
| 718 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_LOWER_BANDWIDTH = 2272,
|
| 719 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_UPPER_BANDWIDTH = 2273,
|
| 720 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_AXIS = 2274,
|
| 721 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_PAD_VALUE = 2275,
|
| 722 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MAX_TOKEN_VALUE = 2276,
|
| 723 |
+
|
| 724 |
+
CUDNN_ATTR_RNG_DISTRIBUTION = 2300,
|
| 725 |
+
CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301,
|
| 726 |
+
CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302,
|
| 727 |
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM = 2303,
|
| 728 |
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM = 2304,
|
| 729 |
+
CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY = 2305,
|
| 730 |
+
|
| 731 |
+
CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
|
| 732 |
+
CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
|
| 733 |
+
CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
|
| 734 |
+
CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
|
| 735 |
+
|
| 736 |
+
CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400,
|
| 737 |
+
CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401,
|
| 738 |
+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION = 2402,
|
| 739 |
+
|
| 740 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_XDESC = 2500,
|
| 741 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_YDESC = 2501,
|
| 742 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_SCALE_DESC = 2502,
|
| 743 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_MATH_PREC = 2503,
|
| 744 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_BLOCK_SIZE = 2504,
|
| 745 |
+
|
| 746 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_XDESC = 2600,
|
| 747 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_SCALE_DESC = 2601,
|
| 748 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_YDESC = 2602,
|
| 749 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_MATH_PREC = 2603,
|
| 750 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_BLOCK_SIZE = 2604,
|
| 751 |
+
|
| 752 |
+
CUDNN_ATTR_DEVICEPROP_DEVICE_ID = 2700,
|
| 753 |
+
CUDNN_ATTR_DEVICEPROP_HANDLE = 2701,
|
| 754 |
+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION = 2702,
|
| 755 |
+
} cudnnBackendAttributeName_t;
|
| 756 |
+
|
| 757 |
+
typedef enum {
|
| 758 |
+
CUDNN_TYPE_HANDLE = 0,
|
| 759 |
+
CUDNN_TYPE_DATA_TYPE = 1,
|
| 760 |
+
CUDNN_TYPE_BOOLEAN = 2,
|
| 761 |
+
CUDNN_TYPE_INT64 = 3,
|
| 762 |
+
CUDNN_TYPE_FLOAT = 4,
|
| 763 |
+
CUDNN_TYPE_DOUBLE = 5,
|
| 764 |
+
CUDNN_TYPE_VOID_PTR = 6,
|
| 765 |
+
CUDNN_TYPE_CONVOLUTION_MODE = 7,
|
| 766 |
+
CUDNN_TYPE_HEUR_MODE = 8,
|
| 767 |
+
CUDNN_TYPE_KNOB_TYPE = 9,
|
| 768 |
+
CUDNN_TYPE_NAN_PROPOGATION CUDNN_DEPRECATED_ENUM = 10,
|
| 769 |
+
CUDNN_TYPE_NUMERICAL_NOTE = 11,
|
| 770 |
+
CUDNN_TYPE_LAYOUT_TYPE = 12,
|
| 771 |
+
CUDNN_TYPE_ATTRIB_NAME = 13,
|
| 772 |
+
CUDNN_TYPE_POINTWISE_MODE = 14,
|
| 773 |
+
CUDNN_TYPE_BACKEND_DESCRIPTOR = 15,
|
| 774 |
+
CUDNN_TYPE_GENSTATS_MODE = 16,
|
| 775 |
+
CUDNN_TYPE_BN_FINALIZE_STATS_MODE = 17,
|
| 776 |
+
CUDNN_TYPE_REDUCTION_OPERATOR_TYPE = 18,
|
| 777 |
+
CUDNN_TYPE_BEHAVIOR_NOTE = 19,
|
| 778 |
+
CUDNN_TYPE_TENSOR_REORDERING_MODE = 20,
|
| 779 |
+
CUDNN_TYPE_RESAMPLE_MODE = 21,
|
| 780 |
+
CUDNN_TYPE_PADDING_MODE = 22,
|
| 781 |
+
CUDNN_TYPE_INT32 = 23,
|
| 782 |
+
CUDNN_TYPE_CHAR = 24,
|
| 783 |
+
CUDNN_TYPE_SIGNAL_MODE = 25,
|
| 784 |
+
CUDNN_TYPE_FRACTION = 26,
|
| 785 |
+
CUDNN_TYPE_NORM_MODE = 27,
|
| 786 |
+
CUDNN_TYPE_NORM_FWD_PHASE = 28,
|
| 787 |
+
CUDNN_TYPE_RNG_DISTRIBUTION = 29,
|
| 788 |
+
} cudnnBackendAttributeType_t;
|
| 789 |
+
|
| 790 |
+
typedef enum {
|
| 791 |
+
CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
|
| 792 |
+
CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR = 1,
|
| 793 |
+
CUDNN_BACKEND_ENGINE_DESCRIPTOR = 2,
|
| 794 |
+
CUDNN_BACKEND_ENGINECFG_DESCRIPTOR = 3,
|
| 795 |
+
CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR = 4,
|
| 796 |
+
CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR = 5,
|
| 797 |
+
CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR = 6,
|
| 798 |
+
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR = 7,
|
| 799 |
+
CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR = 8,
|
| 800 |
+
CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR = 9,
|
| 801 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR = 10,
|
| 802 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR = 11,
|
| 803 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR = 12,
|
| 804 |
+
CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR = 13,
|
| 805 |
+
CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR = 14,
|
| 806 |
+
CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR = 15,
|
| 807 |
+
CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR = 16,
|
| 808 |
+
CUDNN_BACKEND_TENSOR_DESCRIPTOR = 17,
|
| 809 |
+
CUDNN_BACKEND_MATMUL_DESCRIPTOR = 18,
|
| 810 |
+
CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR = 19,
|
| 811 |
+
CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR = 20,
|
| 812 |
+
CUDNN_BACKEND_REDUCTION_DESCRIPTOR = 21,
|
| 813 |
+
CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR = 22,
|
| 814 |
+
CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR = 23,
|
| 815 |
+
CUDNN_BACKEND_RESAMPLE_DESCRIPTOR = 24,
|
| 816 |
+
CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR = 25,
|
| 817 |
+
CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR = 26,
|
| 818 |
+
CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR = 27,
|
| 819 |
+
CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR = 28,
|
| 820 |
+
CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR = 29,
|
| 821 |
+
CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
|
| 822 |
+
CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
|
| 823 |
+
CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
|
| 824 |
+
CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
|
| 825 |
+
CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
|
| 826 |
+
CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35,
|
| 827 |
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 36,
|
| 828 |
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 37,
|
| 829 |
+
CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR = 38,
|
| 830 |
+
CUDNN_BACKEND_OPERATION_EXPAND_BAND_MATRIX_DESCRIPTOR = 39,
|
| 831 |
+
CUDNN_BACKEND_OPERATION_CONTRACT_BAND_MATRIX_DESCRIPTOR = 40,
|
| 832 |
+
} cudnnBackendDescriptorType_t;
|
| 833 |
+
|
| 834 |
+
typedef enum {
|
| 835 |
+
CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
|
| 836 |
+
CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS = 1,
|
| 837 |
+
CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION = 2,
|
| 838 |
+
CUDNN_NUMERICAL_NOTE_FFT = 3,
|
| 839 |
+
CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC = 4,
|
| 840 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD = 5,
|
| 841 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4 = 6,
|
| 842 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6 = 7,
|
| 843 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13 = 8,
|
| 844 |
+
CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP = 9,
|
| 845 |
+
CUDNN_NUMERICAL_NOTE_TYPE_COUNT = 10,
|
| 846 |
+
} cudnnBackendNumericalNote_t;
|
| 847 |
+
|
| 848 |
+
typedef enum {
|
| 849 |
+
CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
|
| 850 |
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
|
| 851 |
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
|
| 852 |
+
CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3,
|
| 853 |
+
CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4,
|
| 854 |
+
} cudnnBackendBehaviorNote_t;
|
| 855 |
+
|
| 856 |
+
typedef enum {
|
| 857 |
+
CUDNN_KNOB_TYPE_SPLIT_K CUDNN_DEPRECATED_ENUM = 0,
|
| 858 |
+
CUDNN_KNOB_TYPE_SWIZZLE = 1,
|
| 859 |
+
CUDNN_KNOB_TYPE_TILE_SIZE = 2,
|
| 860 |
+
CUDNN_KNOB_TYPE_USE_TEX CUDNN_DEPRECATED_ENUM = 3,
|
| 861 |
+
CUDNN_KNOB_TYPE_EDGE = 4,
|
| 862 |
+
CUDNN_KNOB_TYPE_KBLOCK CUDNN_DEPRECATED_ENUM = 5,
|
| 863 |
+
CUDNN_KNOB_TYPE_LDGA CUDNN_DEPRECATED_ENUM = 6,
|
| 864 |
+
CUDNN_KNOB_TYPE_LDGB CUDNN_DEPRECATED_ENUM = 7,
|
| 865 |
+
CUDNN_KNOB_TYPE_CHUNK_K CUDNN_DEPRECATED_ENUM = 8,
|
| 866 |
+
CUDNN_KNOB_TYPE_SPLIT_H CUDNN_DEPRECATED_ENUM = 9,
|
| 867 |
+
CUDNN_KNOB_TYPE_WINO_TILE CUDNN_DEPRECATED_ENUM = 10,
|
| 868 |
+
CUDNN_KNOB_TYPE_MULTIPLY = 11,
|
| 869 |
+
CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
|
| 870 |
+
CUDNN_KNOB_TYPE_TILEK = 13,
|
| 871 |
+
CUDNN_KNOB_TYPE_STAGES = 14,
|
| 872 |
+
CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
|
| 873 |
+
CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE CUDNN_DEPRECATED_ENUM = 16,
|
| 874 |
+
CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
|
| 875 |
+
CUDNN_KNOB_TYPE_IDX_MODE = 18,
|
| 876 |
+
CUDNN_KNOB_TYPE_SLICED CUDNN_DEPRECATED_ENUM = 19,
|
| 877 |
+
CUDNN_KNOB_TYPE_SPLIT_RS CUDNN_DEPRECATED_ENUM = 20,
|
| 878 |
+
CUDNN_KNOB_TYPE_SINGLEBUFFER CUDNN_DEPRECATED_ENUM = 21,
|
| 879 |
+
CUDNN_KNOB_TYPE_LDGC CUDNN_DEPRECATED_ENUM = 22,
|
| 880 |
+
CUDNN_KNOB_TYPE_SPECFILT = 23,
|
| 881 |
+
CUDNN_KNOB_TYPE_KERNEL_CFG = 24,
|
| 882 |
+
CUDNN_KNOB_TYPE_WORKSPACE = 25,
|
| 883 |
+
CUDNN_KNOB_TYPE_TILE_CGA CUDNN_DEPRECATED_ENUM = 26,
|
| 884 |
+
CUDNN_KNOB_TYPE_TILE_CGA_M = 27,
|
| 885 |
+
CUDNN_KNOB_TYPE_TILE_CGA_N = 28,
|
| 886 |
+
CUDNN_KNOB_TYPE_BLOCK_SIZE = 29,
|
| 887 |
+
CUDNN_KNOB_TYPE_OCCUPANCY = 30,
|
| 888 |
+
CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD = 31,
|
| 889 |
+
CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK CUDNN_DEPRECATED_ENUM = 32,
|
| 890 |
+
CUDNN_KNOB_TYPE_SPLIT_COLS = 33,
|
| 891 |
+
CUDNN_KNOB_TYPE_TILE_ROWS = 34,
|
| 892 |
+
CUDNN_KNOB_TYPE_TILE_COLS = 35,
|
| 893 |
+
CUDNN_KNOB_TYPE_LOAD_SIZE = 36,
|
| 894 |
+
CUDNN_KNOB_TYPE_CTA_COUNT = 37,
|
| 895 |
+
CUDNN_KNOB_TYPE_STREAM_K = 38,
|
| 896 |
+
CUDNN_KNOB_TYPE_SPLIT_P_SLC = 39,
|
| 897 |
+
CUDNN_KNOB_TYPE_TILE_M = 40,
|
| 898 |
+
CUDNN_KNOB_TYPE_TILE_N = 41,
|
| 899 |
+
CUDNN_KNOB_TYPE_WARP_SPEC_CFG = 42,
|
| 900 |
+
CUDNN_KNOB_TYPE_COUNTS = 43,
|
| 901 |
+
} cudnnBackendKnobType_t;
|
| 902 |
+
|
| 903 |
+
typedef enum {
|
| 904 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
|
| 905 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
|
| 906 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
|
| 907 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
|
| 908 |
+
CUDNN_LAYOUT_TYPE_COUNT = 4,
|
| 909 |
+
} cudnnBackendLayoutType_t;
|
| 910 |
+
|
| 911 |
+
typedef enum {
|
| 912 |
+
CUDNN_HEUR_MODE_INSTANT = 0,
|
| 913 |
+
CUDNN_HEUR_MODE_B = 1,
|
| 914 |
+
CUDNN_HEUR_MODE_FALLBACK = 2,
|
| 915 |
+
CUDNN_HEUR_MODE_A = 3,
|
| 916 |
+
CUDNN_HEUR_MODES_COUNT = 4,
|
| 917 |
+
} cudnnBackendHeurMode_t;
|
| 918 |
+
|
| 919 |
+
typedef enum {
|
| 920 |
+
CUDNN_TENSOR_REORDERING_NONE = 0,
|
| 921 |
+
CUDNN_TENSOR_REORDERING_INT8x32 = 1,
|
| 922 |
+
CUDNN_TENSOR_REORDERING_F16x16 = 2,
|
| 923 |
+
CUDNN_TENSOR_REORDERING_F8_128x4 = 3,
|
| 924 |
+
} cudnnBackendTensorReordering_t;
|
| 925 |
+
|
| 926 |
+
typedef enum {
|
| 927 |
+
CUDNN_ZERO_PAD = 0,
|
| 928 |
+
CUDNN_NEG_INF_PAD = 1,
|
| 929 |
+
CUDNN_EDGE_VAL_PAD = 2,
|
| 930 |
+
} cudnnPaddingMode_t;
|
| 931 |
+
|
| 932 |
+
typedef enum {
|
| 933 |
+
CUDNN_LAYER_NORM = 0,
|
| 934 |
+
CUDNN_INSTANCE_NORM = 1,
|
| 935 |
+
CUDNN_BATCH_NORM = 2,
|
| 936 |
+
CUDNN_GROUP_NORM = 3,
|
| 937 |
+
CUDNN_RMS_NORM = 4,
|
| 938 |
+
CUDNN_ADA_LAYER_NORM = 5,
|
| 939 |
+
} cudnnBackendNormMode_t;
|
| 940 |
+
|
| 941 |
+
typedef enum {
|
| 942 |
+
CUDNN_NORM_FWD_INFERENCE = 0,
|
| 943 |
+
CUDNN_NORM_FWD_TRAINING = 1,
|
| 944 |
+
} cudnnBackendNormFwdPhase_t;
|
| 945 |
+
|
| 946 |
+
cudnnStatus_t CUDNNWINAPI
|
| 947 |
+
cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor);
|
| 948 |
+
|
| 949 |
+
cudnnStatus_t CUDNNWINAPI
|
| 950 |
+
cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
|
| 951 |
+
|
| 952 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 953 |
+
cudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
|
| 954 |
+
|
| 955 |
+
cudnnStatus_t CUDNNWINAPI
|
| 956 |
+
cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
|
| 957 |
+
|
| 958 |
+
cudnnStatus_t CUDNNWINAPI
|
| 959 |
+
cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor,
|
| 960 |
+
cudnnBackendAttributeName_t attributeName,
|
| 961 |
+
cudnnBackendAttributeType_t attributeType,
|
| 962 |
+
int64_t elementCount,
|
| 963 |
+
const void *arrayOfElements);
|
| 964 |
+
|
| 965 |
+
cudnnStatus_t CUDNNWINAPI
|
| 966 |
+
cudnnBackendGetAttribute(cudnnBackendDescriptor_t const descriptor,
|
| 967 |
+
cudnnBackendAttributeName_t attributeName,
|
| 968 |
+
cudnnBackendAttributeType_t attributeType,
|
| 969 |
+
int64_t requestedElementCount,
|
| 970 |
+
int64_t *elementCount,
|
| 971 |
+
void *arrayOfElements);
|
| 972 |
+
|
| 973 |
+
cudnnStatus_t CUDNNWINAPI
|
| 974 |
+
cudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
|
| 975 |
+
|
| 976 |
+
cudnnStatus_t CUDNNWINAPI
|
| 977 |
+
cudnnBackendPopulateCudaGraph(cudnnHandle_t handle,
|
| 978 |
+
cudnnBackendDescriptor_t executionPlan,
|
| 979 |
+
cudnnBackendDescriptor_t variantPack,
|
| 980 |
+
cudaGraph_t graph);
|
| 981 |
+
|
| 982 |
+
cudnnStatus_t CUDNNWINAPI
|
| 983 |
+
cudnnBackendUpdateCudaGraph(cudnnHandle_t handle,
|
| 984 |
+
cudnnBackendDescriptor_t executionPlan,
|
| 985 |
+
cudnnBackendDescriptor_t variantPack,
|
| 986 |
+
cudaGraph_t graph);
|
| 987 |
+
|
| 988 |
+
#if defined(__cplusplus)
|
| 989 |
+
}
|
| 990 |
+
#endif
|
| 991 |
+
|
| 992 |
+
#endif /* CUDNN_GRAPH_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_graph_v9.h
ADDED
|
@@ -0,0 +1,992 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_graph : cuDNN's basic definitions operations.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_GRAPH_H_)
|
| 55 |
+
#define CUDNN_GRAPH_H_
|
| 56 |
+
|
| 57 |
+
#include <cuda_runtime_api.h>
|
| 58 |
+
#include <library_types.h>
|
| 59 |
+
|
| 60 |
+
#include <stdint.h>
|
| 61 |
+
|
| 62 |
+
#include "cudnn_version.h"
|
| 63 |
+
|
| 64 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 65 |
+
#define CUDNN_GRAPH_MAJOR 9
|
| 66 |
+
#define CUDNN_GRAPH_MINOR 10
|
| 67 |
+
#define CUDNN_GRAPH_PATCH 2
|
| 68 |
+
|
| 69 |
+
#if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
|
| 70 |
+
#error Version mismatch in cuDNN GRAPH!!!
|
| 71 |
+
#endif
|
| 72 |
+
|
| 73 |
+
#ifndef CUDNNWINAPI
|
| 74 |
+
#ifdef _WIN32
|
| 75 |
+
#define CUDNNWINAPI __stdcall
|
| 76 |
+
#else
|
| 77 |
+
#define CUDNNWINAPI
|
| 78 |
+
#endif
|
| 79 |
+
#endif
|
| 80 |
+
|
| 81 |
+
/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
|
| 82 |
+
#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
|
| 83 |
+
/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
|
| 84 |
+
#define CUDNN_DEPRECATED __attribute__((deprecated))
|
| 85 |
+
#define CUDNN_DEPRECATED_ENUM __attribute__((deprecated))
|
| 86 |
+
#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
|
| 87 |
+
/* Microsoft Visual C++ */
|
| 88 |
+
#define CUDNN_DEPRECATED __declspec(deprecated)
|
| 89 |
+
#define CUDNN_DEPRECATED_ENUM __declspec(deprecated)
|
| 90 |
+
#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
|
| 91 |
+
/* C++14 compilers */
|
| 92 |
+
#define CUDNN_DEPRECATED [[deprecated]]
|
| 93 |
+
#define CUDNN_DEPRECATED_ENUM [[deprecated]]
|
| 94 |
+
#else
|
| 95 |
+
/* No support for the deprecated attribute */
|
| 96 |
+
#define CUDNN_DEPRECATED
|
| 97 |
+
#define CUDNN_DEPRECATED_ENUM
|
| 98 |
+
#endif
|
| 99 |
+
|
| 100 |
+
#if defined(__cplusplus)
|
| 101 |
+
extern "C" {
|
| 102 |
+
#endif
|
| 103 |
+
|
| 104 |
+
struct cudnnContext;
|
| 105 |
+
typedef struct cudnnContext *cudnnHandle_t;
|
| 106 |
+
|
| 107 |
+
size_t CUDNNWINAPI
|
| 108 |
+
cudnnGetVersion(void);
|
| 109 |
+
|
| 110 |
+
size_t CUDNNWINAPI
|
| 111 |
+
cudnnGetMaxDeviceVersion(void);
|
| 112 |
+
|
| 113 |
+
/* Returns CUDA Runtime version statically linked against cudnn */
|
| 114 |
+
size_t CUDNNWINAPI
|
| 115 |
+
cudnnGetCudartVersion(void);
|
| 116 |
+
|
| 117 |
+
/*
|
| 118 |
+
* CUDNN return codes
|
| 119 |
+
*/
|
| 120 |
+
typedef enum {
|
| 121 |
+
CUDNN_STATUS_SUCCESS = 0,
|
| 122 |
+
|
| 123 |
+
/* Uncategorized errors */
|
| 124 |
+
CUDNN_STATUS_NOT_INITIALIZED = 1001,
|
| 125 |
+
CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH = 1002,
|
| 126 |
+
CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH = 1003,
|
| 127 |
+
CUDNN_STATUS_DEPRECATED = 1004,
|
| 128 |
+
CUDNN_STATUS_LICENSE_ERROR = 1005,
|
| 129 |
+
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 1006,
|
| 130 |
+
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 1007,
|
| 131 |
+
CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED = 1008,
|
| 132 |
+
|
| 133 |
+
CUDNN_STATUS_BAD_PARAM = 2000,
|
| 134 |
+
CUDNN_STATUS_BAD_PARAM_NULL_POINTER = 2002,
|
| 135 |
+
CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER = 2003,
|
| 136 |
+
CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED = 2004,
|
| 137 |
+
CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND = 2005,
|
| 138 |
+
CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT = 2006,
|
| 139 |
+
CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH = 2007,
|
| 140 |
+
CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008,
|
| 141 |
+
CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009,
|
| 142 |
+
CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010,
|
| 143 |
+
CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
|
| 144 |
+
CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE = 2012,
|
| 145 |
+
|
| 146 |
+
CUDNN_STATUS_NOT_SUPPORTED = 3000,
|
| 147 |
+
CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001,
|
| 148 |
+
CUDNN_STATUS_NOT_SUPPORTED_SHAPE = 3002,
|
| 149 |
+
CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE = 3003,
|
| 150 |
+
CUDNN_STATUS_NOT_SUPPORTED_LAYOUT = 3004,
|
| 151 |
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER = 3005,
|
| 152 |
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART = 3006,
|
| 153 |
+
CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH = 3007,
|
| 154 |
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING = 3008,
|
| 155 |
+
CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE = 3009,
|
| 156 |
+
CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010,
|
| 157 |
+
CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011,
|
| 158 |
+
CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012,
|
| 159 |
+
CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013,
|
| 160 |
+
|
| 161 |
+
CUDNN_STATUS_INTERNAL_ERROR = 4000,
|
| 162 |
+
CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001,
|
| 163 |
+
CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE = 4002,
|
| 164 |
+
CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED = 4003,
|
| 165 |
+
CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED = 4004,
|
| 166 |
+
CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM = 4005,
|
| 167 |
+
CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED = 4006,
|
| 168 |
+
|
| 169 |
+
CUDNN_STATUS_EXECUTION_FAILED = 5000,
|
| 170 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER = 5001,
|
| 171 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUBLAS = 5002,
|
| 172 |
+
CUDNN_STATUS_EXECUTION_FAILED_CUDART = 5003,
|
| 173 |
+
CUDNN_STATUS_EXECUTION_FAILED_CURAND = 5004,
|
| 174 |
+
|
| 175 |
+
CUDNN_STATUS_ALLOC_FAILED CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED,
|
| 176 |
+
CUDNN_STATUS_INVALID_VALUE CUDNN_DEPRECATED_ENUM = 2001 /* please transition to CUDNN_STATUS_BAD_PARAM instead */,
|
| 177 |
+
CUDNN_STATUS_ARCH_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH,
|
| 178 |
+
CUDNN_STATUS_MAPPING_ERROR CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED,
|
| 179 |
+
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING CUDNN_DEPRECATED_ENUM =
|
| 180 |
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING,
|
| 181 |
+
CUDNN_STATUS_VERSION_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH,
|
| 182 |
+
} cudnnStatus_t;
|
| 183 |
+
|
| 184 |
+
#define CUDNN_STATUS_FULL_ERROR_CODE(category, specific_err) ((cudnnStatus_t)(0 + (category) + (specific_err)))
|
| 185 |
+
#define CUDNN_STATUS_CATEGORY(full_error_code) ((full_error_code) / 1000 * 1000)
|
| 186 |
+
#define CUDNN_STATUS_SPECIFIC_ERROR(full_error_code) ((full_error_code) % 1000)
|
| 187 |
+
|
| 188 |
+
/* human-readable error messages */
|
| 189 |
+
const char *CUDNNWINAPI
|
| 190 |
+
cudnnGetErrorString(cudnnStatus_t status);
|
| 191 |
+
|
| 192 |
+
void CUDNNWINAPI
|
| 193 |
+
cudnnGetLastErrorString(char *message, size_t max_size);
|
| 194 |
+
|
| 195 |
+
/* Forward definition in this version only */
|
| 196 |
+
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t CUDNN_DEPRECATED;
|
| 197 |
+
|
| 198 |
+
typedef enum {
|
| 199 |
+
CUDNN_ERRQUERY_RAWCODE = 0,
|
| 200 |
+
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
| 201 |
+
CUDNN_ERRQUERY_BLOCKING = 2,
|
| 202 |
+
} cudnnErrQueryMode_t;
|
| 203 |
+
|
| 204 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 205 |
+
cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
|
| 206 |
+
|
| 207 |
+
cudnnStatus_t CUDNNWINAPI
|
| 208 |
+
cudnnGetProperty(libraryPropertyType type, int *value);
|
| 209 |
+
|
| 210 |
+
cudnnStatus_t CUDNNWINAPI
|
| 211 |
+
cudnnCreate(cudnnHandle_t *handle);
|
| 212 |
+
cudnnStatus_t CUDNNWINAPI
|
| 213 |
+
cudnnDestroy(cudnnHandle_t handle);
|
| 214 |
+
cudnnStatus_t CUDNNWINAPI
|
| 215 |
+
cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
| 216 |
+
cudnnStatus_t CUDNNWINAPI
|
| 217 |
+
cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
|
| 218 |
+
/*
|
| 219 |
+
* CUDNN data type
|
| 220 |
+
*/
|
| 221 |
+
typedef enum {
|
| 222 |
+
CUDNN_DATA_FLOAT = 0,
|
| 223 |
+
CUDNN_DATA_DOUBLE = 1,
|
| 224 |
+
CUDNN_DATA_HALF = 2,
|
| 225 |
+
CUDNN_DATA_INT8 = 3,
|
| 226 |
+
CUDNN_DATA_INT32 = 4,
|
| 227 |
+
CUDNN_DATA_INT8x4 CUDNN_DEPRECATED_ENUM = 5,
|
| 228 |
+
CUDNN_DATA_UINT8 = 6,
|
| 229 |
+
CUDNN_DATA_UINT8x4 CUDNN_DEPRECATED_ENUM = 7,
|
| 230 |
+
CUDNN_DATA_INT8x32 CUDNN_DEPRECATED_ENUM = 8,
|
| 231 |
+
CUDNN_DATA_BFLOAT16 = 9,
|
| 232 |
+
CUDNN_DATA_INT64 = 10,
|
| 233 |
+
CUDNN_DATA_BOOLEAN = 11,
|
| 234 |
+
CUDNN_DATA_FP8_E4M3 = 12,
|
| 235 |
+
CUDNN_DATA_FP8_E5M2 = 13,
|
| 236 |
+
CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
|
| 237 |
+
CUDNN_DATA_FP8_E8M0 = 15,
|
| 238 |
+
CUDNN_DATA_FP4_E2M1 = 16,
|
| 239 |
+
} cudnnDataType_t;
|
| 240 |
+
|
| 241 |
+
/*
|
| 242 |
+
* CUDNN math type
|
| 243 |
+
*/
|
| 244 |
+
typedef enum {
|
| 245 |
+
CUDNN_DEFAULT_MATH = 0,
|
| 246 |
+
CUDNN_TENSOR_OP_MATH = 1,
|
| 247 |
+
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
| 248 |
+
CUDNN_FMA_MATH = 3,
|
| 249 |
+
} cudnnMathType_t;
|
| 250 |
+
|
| 251 |
+
/*
|
| 252 |
+
* CUDNN propagate Nan
|
| 253 |
+
*/
|
| 254 |
+
typedef enum {
|
| 255 |
+
CUDNN_NOT_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 0,
|
| 256 |
+
CUDNN_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 1,
|
| 257 |
+
} cudnnNanPropagation_t;
|
| 258 |
+
|
| 259 |
+
/*
|
| 260 |
+
* Behavior for OOB samples. OOB samples are samples where L+R > T is encountered during the gradient calculation. If
|
| 261 |
+
* gradMode is set to CUDNN_CTC_SKIP_OOB_GRADIENTS, then the CTC loss function does not write to the gradient buffer for
|
| 262 |
+
* that sample. Instead, the current values, even not finite, are retained. If gradMode is set to
|
| 263 |
+
* CUDNN_CTC_ZERO_OOB_GRADIENTS, then the gradient for that sample is set to zero. This guarantees a finite gradient.
|
| 264 |
+
*/
|
| 265 |
+
typedef enum {
|
| 266 |
+
CUDNN_CTC_ZERO_OOB_GRADIENTS = 0,
|
| 267 |
+
CUDNN_CTC_SKIP_OOB_GRADIENTS = 1,
|
| 268 |
+
} cudnnCTCGradMode_t;
|
| 269 |
+
|
| 270 |
+
typedef enum {
|
| 271 |
+
CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
|
| 272 |
+
CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
|
| 273 |
+
CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
|
| 274 |
+
} cudnnTensorFormat_t;
|
| 275 |
+
|
| 276 |
+
/*
|
| 277 |
+
* CUDNN ReduceTensor op type
|
| 278 |
+
*/
|
| 279 |
+
typedef enum {
|
| 280 |
+
CUDNN_REDUCE_TENSOR_ADD = 0,
|
| 281 |
+
CUDNN_REDUCE_TENSOR_MUL = 1,
|
| 282 |
+
CUDNN_REDUCE_TENSOR_MIN = 2,
|
| 283 |
+
CUDNN_REDUCE_TENSOR_MAX = 3,
|
| 284 |
+
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
| 285 |
+
CUDNN_REDUCE_TENSOR_AVG = 5,
|
| 286 |
+
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
| 287 |
+
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
| 288 |
+
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
| 289 |
+
} cudnnReduceTensorOp_t;
|
| 290 |
+
|
| 291 |
+
/*
|
| 292 |
+
* activation mode
|
| 293 |
+
*/
|
| 294 |
+
typedef enum {
|
| 295 |
+
CUDNN_ACTIVATION_SIGMOID = 0,
|
| 296 |
+
CUDNN_ACTIVATION_RELU = 1,
|
| 297 |
+
CUDNN_ACTIVATION_TANH = 2,
|
| 298 |
+
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
| 299 |
+
CUDNN_ACTIVATION_ELU = 4,
|
| 300 |
+
CUDNN_ACTIVATION_IDENTITY = 5,
|
| 301 |
+
CUDNN_ACTIVATION_SWISH = 6
|
| 302 |
+
} cudnnActivationMode_t CUDNN_DEPRECATED;
|
| 303 |
+
|
| 304 |
+
typedef enum {
|
| 305 |
+
CUDNN_SEV_FATAL = 0,
|
| 306 |
+
CUDNN_SEV_ERROR = 1,
|
| 307 |
+
CUDNN_SEV_WARNING = 2,
|
| 308 |
+
CUDNN_SEV_INFO = 3,
|
| 309 |
+
} cudnnSeverity_t;
|
| 310 |
+
|
| 311 |
+
/* Message masks to be used with cudnnSetCallback() */
|
| 312 |
+
#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
|
| 313 |
+
#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
|
| 314 |
+
#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
|
| 315 |
+
|
| 316 |
+
/* struct containing useful informaiton for each API call */
|
| 317 |
+
typedef struct cudnnDebugStruct {
|
| 318 |
+
unsigned cudnn_version;
|
| 319 |
+
cudnnStatus_t cudnnStatus;
|
| 320 |
+
unsigned time_sec; /* epoch time in seconds */
|
| 321 |
+
unsigned time_usec; /* microseconds part of epoch time */
|
| 322 |
+
unsigned time_delta; /* time since start in seconds */
|
| 323 |
+
cudnnHandle_t handle; /* cudnn handle */
|
| 324 |
+
cudaStream_t stream; /* cuda stream ID */
|
| 325 |
+
unsigned long long pid; /* process ID */
|
| 326 |
+
unsigned long long tid; /* thread ID */
|
| 327 |
+
int cudaDeviceId; /* CUDA device ID */
|
| 328 |
+
int reserved[15]; /* reserved for future use */
|
| 329 |
+
} cudnnDebug_t;
|
| 330 |
+
|
| 331 |
+
typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
|
| 332 |
+
|
| 333 |
+
cudnnStatus_t CUDNNWINAPI
|
| 334 |
+
cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
|
| 335 |
+
|
| 336 |
+
cudnnStatus_t CUDNNWINAPI
|
| 337 |
+
cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
|
| 338 |
+
|
| 339 |
+
/*
|
| 340 |
+
* \brief Cross-library version checker.
|
| 341 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 342 |
+
* checks whether its own version matches that of its dependencies.
|
| 343 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 344 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 345 |
+
*/
|
| 346 |
+
cudnnStatus_t CUDNNWINAPI
|
| 347 |
+
cudnnGraphVersionCheck(void);
|
| 348 |
+
|
| 349 |
+
/* Maximum supported number of tensor dimensions */
|
| 350 |
+
#define CUDNN_DIM_MAX 8
|
| 351 |
+
|
| 352 |
+
/*
|
| 353 |
+
* convolution mode
|
| 354 |
+
*/
|
| 355 |
+
typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
|
| 356 |
+
|
| 357 |
+
/*
|
| 358 |
+
* CUDNN Reorder
|
| 359 |
+
*/
|
| 360 |
+
typedef enum {
|
| 361 |
+
CUDNN_DEFAULT_REORDER = 0,
|
| 362 |
+
CUDNN_NO_REORDER = 1,
|
| 363 |
+
} cudnnReorderType_t CUDNN_DEPRECATED;
|
| 364 |
+
|
| 365 |
+
typedef void *cudnnBackendDescriptor_t;
|
| 366 |
+
|
| 367 |
+
typedef struct cudnnFractionStruct {
|
| 368 |
+
int64_t numerator;
|
| 369 |
+
int64_t denominator;
|
| 370 |
+
} cudnnFraction_t;
|
| 371 |
+
|
| 372 |
+
typedef enum {
|
| 373 |
+
CUDNN_POINTWISE_ADD = 0,
|
| 374 |
+
CUDNN_POINTWISE_ADD_SQUARE = 5,
|
| 375 |
+
CUDNN_POINTWISE_DIV = 6,
|
| 376 |
+
CUDNN_POINTWISE_MAX = 3,
|
| 377 |
+
CUDNN_POINTWISE_MIN = 2,
|
| 378 |
+
CUDNN_POINTWISE_MOD = 7,
|
| 379 |
+
CUDNN_POINTWISE_MUL = 1,
|
| 380 |
+
CUDNN_POINTWISE_POW = 8,
|
| 381 |
+
CUDNN_POINTWISE_SUB = 9,
|
| 382 |
+
|
| 383 |
+
CUDNN_POINTWISE_ABS = 10,
|
| 384 |
+
CUDNN_POINTWISE_CEIL = 11,
|
| 385 |
+
CUDNN_POINTWISE_COS = 12,
|
| 386 |
+
CUDNN_POINTWISE_EXP = 13,
|
| 387 |
+
CUDNN_POINTWISE_FLOOR = 14,
|
| 388 |
+
CUDNN_POINTWISE_LOG = 15,
|
| 389 |
+
CUDNN_POINTWISE_NEG = 16,
|
| 390 |
+
CUDNN_POINTWISE_RSQRT = 17,
|
| 391 |
+
CUDNN_POINTWISE_SIN = 18,
|
| 392 |
+
CUDNN_POINTWISE_SQRT = 4,
|
| 393 |
+
CUDNN_POINTWISE_TAN = 19,
|
| 394 |
+
CUDNN_POINTWISE_ERF = 20,
|
| 395 |
+
CUDNN_POINTWISE_IDENTITY = 21,
|
| 396 |
+
CUDNN_POINTWISE_RECIPROCAL = 22,
|
| 397 |
+
CUDNN_POINTWISE_ATAN2 = 23,
|
| 398 |
+
|
| 399 |
+
CUDNN_POINTWISE_RELU_FWD = 100,
|
| 400 |
+
CUDNN_POINTWISE_TANH_FWD = 101,
|
| 401 |
+
CUDNN_POINTWISE_SIGMOID_FWD = 102,
|
| 402 |
+
CUDNN_POINTWISE_ELU_FWD = 103,
|
| 403 |
+
CUDNN_POINTWISE_GELU_FWD = 104,
|
| 404 |
+
CUDNN_POINTWISE_SOFTPLUS_FWD = 105,
|
| 405 |
+
CUDNN_POINTWISE_SWISH_FWD = 106,
|
| 406 |
+
CUDNN_POINTWISE_GELU_APPROX_TANH_FWD = 107,
|
| 407 |
+
|
| 408 |
+
CUDNN_POINTWISE_RELU_BWD = 200,
|
| 409 |
+
CUDNN_POINTWISE_TANH_BWD = 201,
|
| 410 |
+
CUDNN_POINTWISE_SIGMOID_BWD = 202,
|
| 411 |
+
CUDNN_POINTWISE_ELU_BWD = 203,
|
| 412 |
+
CUDNN_POINTWISE_GELU_BWD = 204,
|
| 413 |
+
CUDNN_POINTWISE_SOFTPLUS_BWD = 205,
|
| 414 |
+
CUDNN_POINTWISE_SWISH_BWD = 206,
|
| 415 |
+
CUDNN_POINTWISE_GELU_APPROX_TANH_BWD = 207,
|
| 416 |
+
|
| 417 |
+
CUDNN_POINTWISE_CMP_EQ = 300,
|
| 418 |
+
CUDNN_POINTWISE_CMP_NEQ = 301,
|
| 419 |
+
CUDNN_POINTWISE_CMP_GT = 302,
|
| 420 |
+
CUDNN_POINTWISE_CMP_GE = 303,
|
| 421 |
+
CUDNN_POINTWISE_CMP_LT = 304,
|
| 422 |
+
CUDNN_POINTWISE_CMP_LE = 305,
|
| 423 |
+
|
| 424 |
+
CUDNN_POINTWISE_LOGICAL_AND = 400,
|
| 425 |
+
CUDNN_POINTWISE_LOGICAL_OR = 401,
|
| 426 |
+
CUDNN_POINTWISE_LOGICAL_NOT = 402,
|
| 427 |
+
|
| 428 |
+
CUDNN_POINTWISE_GEN_INDEX = 501,
|
| 429 |
+
|
| 430 |
+
CUDNN_POINTWISE_BINARY_SELECT = 601,
|
| 431 |
+
} cudnnPointwiseMode_t;
|
| 432 |
+
|
| 433 |
+
typedef enum {
|
| 434 |
+
CUDNN_RESAMPLE_NEAREST = 0,
|
| 435 |
+
CUDNN_RESAMPLE_BILINEAR = 1,
|
| 436 |
+
CUDNN_RESAMPLE_AVGPOOL = 2,
|
| 437 |
+
CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING = 2,
|
| 438 |
+
CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING = 4,
|
| 439 |
+
CUDNN_RESAMPLE_MAXPOOL = 3,
|
| 440 |
+
} cudnnResampleMode_t;
|
| 441 |
+
|
| 442 |
+
typedef enum {
|
| 443 |
+
CUDNN_SIGNAL_SET = 0,
|
| 444 |
+
CUDNN_SIGNAL_WAIT = 1,
|
| 445 |
+
} cudnnSignalMode_t;
|
| 446 |
+
|
| 447 |
+
typedef enum {
|
| 448 |
+
CUDNN_GENSTATS_SUM_SQSUM = 0,
|
| 449 |
+
} cudnnGenStatsMode_t;
|
| 450 |
+
|
| 451 |
+
typedef enum {
|
| 452 |
+
CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0,
|
| 453 |
+
CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1,
|
| 454 |
+
} cudnnBnFinalizeStatsMode_t;
|
| 455 |
+
|
| 456 |
+
typedef enum {
|
| 457 |
+
CUDNN_RNG_DISTRIBUTION_BERNOULLI = 0,
|
| 458 |
+
CUDNN_RNG_DISTRIBUTION_UNIFORM = 1,
|
| 459 |
+
CUDNN_RNG_DISTRIBUTION_NORMAL = 2,
|
| 460 |
+
} cudnnRngDistribution_t;
|
| 461 |
+
|
| 462 |
+
typedef enum {
|
| 463 |
+
CUDNN_ATTR_POINTWISE_MODE = 0,
|
| 464 |
+
CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
|
| 465 |
+
CUDNN_ATTR_POINTWISE_NAN_PROPAGATION CUDNN_DEPRECATED_ENUM = 2,
|
| 466 |
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
|
| 467 |
+
CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
|
| 468 |
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5,
|
| 469 |
+
CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6,
|
| 470 |
+
CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7,
|
| 471 |
+
CUDNN_ATTR_POINTWISE_SWISH_BETA = 8,
|
| 472 |
+
CUDNN_ATTR_POINTWISE_AXIS = 9,
|
| 473 |
+
|
| 474 |
+
CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
|
| 475 |
+
CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
|
| 476 |
+
CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
|
| 477 |
+
CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
|
| 478 |
+
CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
|
| 479 |
+
CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
|
| 480 |
+
CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
|
| 481 |
+
|
| 482 |
+
CUDNN_ATTR_ENGINEHEUR_MODE = 200,
|
| 483 |
+
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
|
| 484 |
+
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
|
| 485 |
+
CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203,
|
| 486 |
+
CUDNN_ATTR_ENGINEHEUR_DEVICEPROP = 204,
|
| 487 |
+
|
| 488 |
+
CUDNN_ATTR_ENGINECFG_ENGINE = 300,
|
| 489 |
+
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
|
| 490 |
+
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
|
| 491 |
+
CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE = 303,
|
| 492 |
+
CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED = 304,
|
| 493 |
+
|
| 494 |
+
CUDNN_ATTR_EXECUTION_PLAN_HANDLE CUDNN_DEPRECATED_ENUM = 400,
|
| 495 |
+
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
|
| 496 |
+
CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
|
| 497 |
+
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
|
| 498 |
+
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
|
| 499 |
+
CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
|
| 500 |
+
CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406,
|
| 501 |
+
CUDNN_ATTR_EXECUTION_PLAN_DEVICEPROP = 407,
|
| 502 |
+
|
| 503 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
|
| 504 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
|
| 505 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
|
| 506 |
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
|
| 507 |
+
|
| 508 |
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
|
| 509 |
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
|
| 510 |
+
|
| 511 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
|
| 512 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
|
| 513 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
|
| 514 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
|
| 515 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
|
| 516 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
|
| 517 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
|
| 518 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
|
| 519 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
|
| 520 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
|
| 521 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
|
| 522 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
|
| 523 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
|
| 524 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
|
| 525 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
|
| 526 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
|
| 527 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
|
| 528 |
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
|
| 529 |
+
|
| 530 |
+
CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
|
| 531 |
+
CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
|
| 532 |
+
CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
|
| 533 |
+
CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
|
| 534 |
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
|
| 535 |
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
|
| 536 |
+
CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756,
|
| 537 |
+
CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757,
|
| 538 |
+
CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758,
|
| 539 |
+
|
| 540 |
+
CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
|
| 541 |
+
CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
|
| 542 |
+
CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
|
| 543 |
+
CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
|
| 544 |
+
CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
|
| 545 |
+
|
| 546 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780,
|
| 547 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781,
|
| 548 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782,
|
| 549 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783,
|
| 550 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784,
|
| 551 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785,
|
| 552 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786,
|
| 553 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787,
|
| 554 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788,
|
| 555 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789,
|
| 556 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790,
|
| 557 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791,
|
| 558 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792,
|
| 559 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793,
|
| 560 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794,
|
| 561 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795,
|
| 562 |
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796,
|
| 563 |
+
|
| 564 |
+
CUDNN_ATTR_OPERATIONGRAPH_HANDLE CUDNN_DEPRECATED_ENUM = 800,
|
| 565 |
+
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
|
| 566 |
+
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
|
| 567 |
+
CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
|
| 568 |
+
CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY = 804,
|
| 569 |
+
|
| 570 |
+
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
|
| 571 |
+
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
|
| 572 |
+
CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
|
| 573 |
+
CUDNN_ATTR_TENSOR_STRIDES = 903,
|
| 574 |
+
CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
|
| 575 |
+
CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
|
| 576 |
+
CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
|
| 577 |
+
CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
|
| 578 |
+
CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908,
|
| 579 |
+
CUDNN_ATTR_TENSOR_REORDERING_MODE = 909,
|
| 580 |
+
CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC = 913,
|
| 581 |
+
|
| 582 |
+
CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
|
| 583 |
+
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
|
| 584 |
+
CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
|
| 585 |
+
CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
|
| 586 |
+
|
| 587 |
+
CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
|
| 588 |
+
CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
|
| 589 |
+
|
| 590 |
+
CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
|
| 591 |
+
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
|
| 592 |
+
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
|
| 593 |
+
CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
|
| 594 |
+
|
| 595 |
+
CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
|
| 596 |
+
CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
|
| 597 |
+
CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
|
| 598 |
+
CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
|
| 599 |
+
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
|
| 600 |
+
CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305,
|
| 601 |
+
CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306,
|
| 602 |
+
CUDNN_ATTR_ENGINE_DEVICEPROP = 1307,
|
| 603 |
+
|
| 604 |
+
CUDNN_ATTR_MATMUL_COMP_TYPE = 1500,
|
| 605 |
+
CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503,
|
| 606 |
+
|
| 607 |
+
CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520,
|
| 608 |
+
CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521,
|
| 609 |
+
CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522,
|
| 610 |
+
CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523,
|
| 611 |
+
CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT CUDNN_DEPRECATED_ENUM = 1524,
|
| 612 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC = 1525,
|
| 613 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC = 1526,
|
| 614 |
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC = 1527,
|
| 615 |
+
|
| 616 |
+
CUDNN_ATTR_REDUCTION_OPERATOR = 1600,
|
| 617 |
+
CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601,
|
| 618 |
+
|
| 619 |
+
CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610,
|
| 620 |
+
CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611,
|
| 621 |
+
CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612,
|
| 622 |
+
|
| 623 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620,
|
| 624 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621,
|
| 625 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622,
|
| 626 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623,
|
| 627 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624,
|
| 628 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625,
|
| 629 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626,
|
| 630 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627,
|
| 631 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628,
|
| 632 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629,
|
| 633 |
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630,
|
| 634 |
+
|
| 635 |
+
CUDNN_ATTR_RESAMPLE_MODE = 1700,
|
| 636 |
+
CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701,
|
| 637 |
+
CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702,
|
| 638 |
+
CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703,
|
| 639 |
+
CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704,
|
| 640 |
+
CUDNN_ATTR_RESAMPLE_STRIDES = 1705,
|
| 641 |
+
CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706,
|
| 642 |
+
CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707,
|
| 643 |
+
CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708,
|
| 644 |
+
|
| 645 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710,
|
| 646 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711,
|
| 647 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712,
|
| 648 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA CUDNN_DEPRECATED_ENUM = 1713,
|
| 649 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA CUDNN_DEPRECATED_ENUM = 1714,
|
| 650 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716,
|
| 651 |
+
|
| 652 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720,
|
| 653 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721,
|
| 654 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722,
|
| 655 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA CUDNN_DEPRECATED_ENUM = 1723,
|
| 656 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA CUDNN_DEPRECATED_ENUM = 1724,
|
| 657 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725,
|
| 658 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC = 1726,
|
| 659 |
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC = 1727,
|
| 660 |
+
|
| 661 |
+
CUDNN_ATTR_OPERATION_CONCAT_AXIS = 1800,
|
| 662 |
+
CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS = 1801,
|
| 663 |
+
CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX = 1802,
|
| 664 |
+
CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC = 1803,
|
| 665 |
+
|
| 666 |
+
CUDNN_ATTR_OPERATION_SIGNAL_MODE = 1900,
|
| 667 |
+
CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC = 1901,
|
| 668 |
+
CUDNN_ATTR_OPERATION_SIGNAL_VALUE = 1902,
|
| 669 |
+
CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
|
| 670 |
+
CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
|
| 671 |
+
|
| 672 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950,
|
| 673 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951,
|
| 674 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952,
|
| 675 |
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
|
| 676 |
+
|
| 677 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
|
| 678 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
|
| 679 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
|
| 680 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC = 2003,
|
| 681 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC = 2004,
|
| 682 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC = 2005,
|
| 683 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC = 2006,
|
| 684 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC = 2007,
|
| 685 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC = 2008,
|
| 686 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC = 2009,
|
| 687 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC = 2010,
|
| 688 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC = 2011,
|
| 689 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC = 2012,
|
| 690 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_YDESC = 2013,
|
| 691 |
+
CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS = 2014,
|
| 692 |
+
|
| 693 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_MODE = 2100,
|
| 694 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_XDESC = 2101,
|
| 695 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC = 2102,
|
| 696 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC = 2103,
|
| 697 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC = 2104,
|
| 698 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC = 2105,
|
| 699 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC = 2106,
|
| 700 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC = 2107,
|
| 701 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC = 2108,
|
| 702 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC = 2109,
|
| 703 |
+
CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110,
|
| 704 |
+
|
| 705 |
+
CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200,
|
| 706 |
+
CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201,
|
| 707 |
+
|
| 708 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_XDESC = 2250,
|
| 709 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_YDESC = 2251,
|
| 710 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_LOWER_BANDWIDTH = 2252,
|
| 711 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_UPPER_BANDWIDTH = 2253,
|
| 712 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_AXIS = 2254,
|
| 713 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_PAD_VALUE = 2255,
|
| 714 |
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_KV_TOKEN_OFFSET_DESC = 2256,
|
| 715 |
+
|
| 716 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_XDESC = 2270,
|
| 717 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_YDESC = 2271,
|
| 718 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_LOWER_BANDWIDTH = 2272,
|
| 719 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_UPPER_BANDWIDTH = 2273,
|
| 720 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_AXIS = 2274,
|
| 721 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_PAD_VALUE = 2275,
|
| 722 |
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MAX_TOKEN_VALUE = 2276,
|
| 723 |
+
|
| 724 |
+
CUDNN_ATTR_RNG_DISTRIBUTION = 2300,
|
| 725 |
+
CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301,
|
| 726 |
+
CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302,
|
| 727 |
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM = 2303,
|
| 728 |
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM = 2304,
|
| 729 |
+
CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY = 2305,
|
| 730 |
+
|
| 731 |
+
CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
|
| 732 |
+
CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
|
| 733 |
+
CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
|
| 734 |
+
CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
|
| 735 |
+
|
| 736 |
+
CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400,
|
| 737 |
+
CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401,
|
| 738 |
+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION = 2402,
|
| 739 |
+
|
| 740 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_XDESC = 2500,
|
| 741 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_YDESC = 2501,
|
| 742 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_SCALE_DESC = 2502,
|
| 743 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_MATH_PREC = 2503,
|
| 744 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_BLOCK_SIZE = 2504,
|
| 745 |
+
|
| 746 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_XDESC = 2600,
|
| 747 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_SCALE_DESC = 2601,
|
| 748 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_YDESC = 2602,
|
| 749 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_MATH_PREC = 2603,
|
| 750 |
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_BLOCK_SIZE = 2604,
|
| 751 |
+
|
| 752 |
+
CUDNN_ATTR_DEVICEPROP_DEVICE_ID = 2700,
|
| 753 |
+
CUDNN_ATTR_DEVICEPROP_HANDLE = 2701,
|
| 754 |
+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION = 2702,
|
| 755 |
+
} cudnnBackendAttributeName_t;
|
| 756 |
+
|
| 757 |
+
typedef enum {
|
| 758 |
+
CUDNN_TYPE_HANDLE = 0,
|
| 759 |
+
CUDNN_TYPE_DATA_TYPE = 1,
|
| 760 |
+
CUDNN_TYPE_BOOLEAN = 2,
|
| 761 |
+
CUDNN_TYPE_INT64 = 3,
|
| 762 |
+
CUDNN_TYPE_FLOAT = 4,
|
| 763 |
+
CUDNN_TYPE_DOUBLE = 5,
|
| 764 |
+
CUDNN_TYPE_VOID_PTR = 6,
|
| 765 |
+
CUDNN_TYPE_CONVOLUTION_MODE = 7,
|
| 766 |
+
CUDNN_TYPE_HEUR_MODE = 8,
|
| 767 |
+
CUDNN_TYPE_KNOB_TYPE = 9,
|
| 768 |
+
CUDNN_TYPE_NAN_PROPOGATION CUDNN_DEPRECATED_ENUM = 10,
|
| 769 |
+
CUDNN_TYPE_NUMERICAL_NOTE = 11,
|
| 770 |
+
CUDNN_TYPE_LAYOUT_TYPE = 12,
|
| 771 |
+
CUDNN_TYPE_ATTRIB_NAME = 13,
|
| 772 |
+
CUDNN_TYPE_POINTWISE_MODE = 14,
|
| 773 |
+
CUDNN_TYPE_BACKEND_DESCRIPTOR = 15,
|
| 774 |
+
CUDNN_TYPE_GENSTATS_MODE = 16,
|
| 775 |
+
CUDNN_TYPE_BN_FINALIZE_STATS_MODE = 17,
|
| 776 |
+
CUDNN_TYPE_REDUCTION_OPERATOR_TYPE = 18,
|
| 777 |
+
CUDNN_TYPE_BEHAVIOR_NOTE = 19,
|
| 778 |
+
CUDNN_TYPE_TENSOR_REORDERING_MODE = 20,
|
| 779 |
+
CUDNN_TYPE_RESAMPLE_MODE = 21,
|
| 780 |
+
CUDNN_TYPE_PADDING_MODE = 22,
|
| 781 |
+
CUDNN_TYPE_INT32 = 23,
|
| 782 |
+
CUDNN_TYPE_CHAR = 24,
|
| 783 |
+
CUDNN_TYPE_SIGNAL_MODE = 25,
|
| 784 |
+
CUDNN_TYPE_FRACTION = 26,
|
| 785 |
+
CUDNN_TYPE_NORM_MODE = 27,
|
| 786 |
+
CUDNN_TYPE_NORM_FWD_PHASE = 28,
|
| 787 |
+
CUDNN_TYPE_RNG_DISTRIBUTION = 29,
|
| 788 |
+
} cudnnBackendAttributeType_t;
|
| 789 |
+
|
| 790 |
+
typedef enum {
|
| 791 |
+
CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
|
| 792 |
+
CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR = 1,
|
| 793 |
+
CUDNN_BACKEND_ENGINE_DESCRIPTOR = 2,
|
| 794 |
+
CUDNN_BACKEND_ENGINECFG_DESCRIPTOR = 3,
|
| 795 |
+
CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR = 4,
|
| 796 |
+
CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR = 5,
|
| 797 |
+
CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR = 6,
|
| 798 |
+
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR = 7,
|
| 799 |
+
CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR = 8,
|
| 800 |
+
CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR = 9,
|
| 801 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR = 10,
|
| 802 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR = 11,
|
| 803 |
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR = 12,
|
| 804 |
+
CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR = 13,
|
| 805 |
+
CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR = 14,
|
| 806 |
+
CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR = 15,
|
| 807 |
+
CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR = 16,
|
| 808 |
+
CUDNN_BACKEND_TENSOR_DESCRIPTOR = 17,
|
| 809 |
+
CUDNN_BACKEND_MATMUL_DESCRIPTOR = 18,
|
| 810 |
+
CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR = 19,
|
| 811 |
+
CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR = 20,
|
| 812 |
+
CUDNN_BACKEND_REDUCTION_DESCRIPTOR = 21,
|
| 813 |
+
CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR = 22,
|
| 814 |
+
CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR = 23,
|
| 815 |
+
CUDNN_BACKEND_RESAMPLE_DESCRIPTOR = 24,
|
| 816 |
+
CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR = 25,
|
| 817 |
+
CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR = 26,
|
| 818 |
+
CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR = 27,
|
| 819 |
+
CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR = 28,
|
| 820 |
+
CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR = 29,
|
| 821 |
+
CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
|
| 822 |
+
CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
|
| 823 |
+
CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
|
| 824 |
+
CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
|
| 825 |
+
CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
|
| 826 |
+
CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35,
|
| 827 |
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 36,
|
| 828 |
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 37,
|
| 829 |
+
CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR = 38,
|
| 830 |
+
CUDNN_BACKEND_OPERATION_EXPAND_BAND_MATRIX_DESCRIPTOR = 39,
|
| 831 |
+
CUDNN_BACKEND_OPERATION_CONTRACT_BAND_MATRIX_DESCRIPTOR = 40,
|
| 832 |
+
} cudnnBackendDescriptorType_t;
|
| 833 |
+
|
| 834 |
+
typedef enum {
|
| 835 |
+
CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
|
| 836 |
+
CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS = 1,
|
| 837 |
+
CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION = 2,
|
| 838 |
+
CUDNN_NUMERICAL_NOTE_FFT = 3,
|
| 839 |
+
CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC = 4,
|
| 840 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD = 5,
|
| 841 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4 = 6,
|
| 842 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6 = 7,
|
| 843 |
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13 = 8,
|
| 844 |
+
CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP = 9,
|
| 845 |
+
CUDNN_NUMERICAL_NOTE_TYPE_COUNT = 10,
|
| 846 |
+
} cudnnBackendNumericalNote_t;
|
| 847 |
+
|
| 848 |
+
typedef enum {
|
| 849 |
+
CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
|
| 850 |
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
|
| 851 |
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
|
| 852 |
+
CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3,
|
| 853 |
+
CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4,
|
| 854 |
+
} cudnnBackendBehaviorNote_t;
|
| 855 |
+
|
| 856 |
+
typedef enum {
|
| 857 |
+
CUDNN_KNOB_TYPE_SPLIT_K CUDNN_DEPRECATED_ENUM = 0,
|
| 858 |
+
CUDNN_KNOB_TYPE_SWIZZLE = 1,
|
| 859 |
+
CUDNN_KNOB_TYPE_TILE_SIZE = 2,
|
| 860 |
+
CUDNN_KNOB_TYPE_USE_TEX CUDNN_DEPRECATED_ENUM = 3,
|
| 861 |
+
CUDNN_KNOB_TYPE_EDGE = 4,
|
| 862 |
+
CUDNN_KNOB_TYPE_KBLOCK CUDNN_DEPRECATED_ENUM = 5,
|
| 863 |
+
CUDNN_KNOB_TYPE_LDGA CUDNN_DEPRECATED_ENUM = 6,
|
| 864 |
+
CUDNN_KNOB_TYPE_LDGB CUDNN_DEPRECATED_ENUM = 7,
|
| 865 |
+
CUDNN_KNOB_TYPE_CHUNK_K CUDNN_DEPRECATED_ENUM = 8,
|
| 866 |
+
CUDNN_KNOB_TYPE_SPLIT_H CUDNN_DEPRECATED_ENUM = 9,
|
| 867 |
+
CUDNN_KNOB_TYPE_WINO_TILE CUDNN_DEPRECATED_ENUM = 10,
|
| 868 |
+
CUDNN_KNOB_TYPE_MULTIPLY = 11,
|
| 869 |
+
CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
|
| 870 |
+
CUDNN_KNOB_TYPE_TILEK = 13,
|
| 871 |
+
CUDNN_KNOB_TYPE_STAGES = 14,
|
| 872 |
+
CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
|
| 873 |
+
CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE CUDNN_DEPRECATED_ENUM = 16,
|
| 874 |
+
CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
|
| 875 |
+
CUDNN_KNOB_TYPE_IDX_MODE = 18,
|
| 876 |
+
CUDNN_KNOB_TYPE_SLICED CUDNN_DEPRECATED_ENUM = 19,
|
| 877 |
+
CUDNN_KNOB_TYPE_SPLIT_RS CUDNN_DEPRECATED_ENUM = 20,
|
| 878 |
+
CUDNN_KNOB_TYPE_SINGLEBUFFER CUDNN_DEPRECATED_ENUM = 21,
|
| 879 |
+
CUDNN_KNOB_TYPE_LDGC CUDNN_DEPRECATED_ENUM = 22,
|
| 880 |
+
CUDNN_KNOB_TYPE_SPECFILT = 23,
|
| 881 |
+
CUDNN_KNOB_TYPE_KERNEL_CFG = 24,
|
| 882 |
+
CUDNN_KNOB_TYPE_WORKSPACE = 25,
|
| 883 |
+
CUDNN_KNOB_TYPE_TILE_CGA CUDNN_DEPRECATED_ENUM = 26,
|
| 884 |
+
CUDNN_KNOB_TYPE_TILE_CGA_M = 27,
|
| 885 |
+
CUDNN_KNOB_TYPE_TILE_CGA_N = 28,
|
| 886 |
+
CUDNN_KNOB_TYPE_BLOCK_SIZE = 29,
|
| 887 |
+
CUDNN_KNOB_TYPE_OCCUPANCY = 30,
|
| 888 |
+
CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD = 31,
|
| 889 |
+
CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK CUDNN_DEPRECATED_ENUM = 32,
|
| 890 |
+
CUDNN_KNOB_TYPE_SPLIT_COLS = 33,
|
| 891 |
+
CUDNN_KNOB_TYPE_TILE_ROWS = 34,
|
| 892 |
+
CUDNN_KNOB_TYPE_TILE_COLS = 35,
|
| 893 |
+
CUDNN_KNOB_TYPE_LOAD_SIZE = 36,
|
| 894 |
+
CUDNN_KNOB_TYPE_CTA_COUNT = 37,
|
| 895 |
+
CUDNN_KNOB_TYPE_STREAM_K = 38,
|
| 896 |
+
CUDNN_KNOB_TYPE_SPLIT_P_SLC = 39,
|
| 897 |
+
CUDNN_KNOB_TYPE_TILE_M = 40,
|
| 898 |
+
CUDNN_KNOB_TYPE_TILE_N = 41,
|
| 899 |
+
CUDNN_KNOB_TYPE_WARP_SPEC_CFG = 42,
|
| 900 |
+
CUDNN_KNOB_TYPE_COUNTS = 43,
|
| 901 |
+
} cudnnBackendKnobType_t;
|
| 902 |
+
|
| 903 |
+
typedef enum {
|
| 904 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
|
| 905 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
|
| 906 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
|
| 907 |
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
|
| 908 |
+
CUDNN_LAYOUT_TYPE_COUNT = 4,
|
| 909 |
+
} cudnnBackendLayoutType_t;
|
| 910 |
+
|
| 911 |
+
typedef enum {
|
| 912 |
+
CUDNN_HEUR_MODE_INSTANT = 0,
|
| 913 |
+
CUDNN_HEUR_MODE_B = 1,
|
| 914 |
+
CUDNN_HEUR_MODE_FALLBACK = 2,
|
| 915 |
+
CUDNN_HEUR_MODE_A = 3,
|
| 916 |
+
CUDNN_HEUR_MODES_COUNT = 4,
|
| 917 |
+
} cudnnBackendHeurMode_t;
|
| 918 |
+
|
| 919 |
+
typedef enum {
|
| 920 |
+
CUDNN_TENSOR_REORDERING_NONE = 0,
|
| 921 |
+
CUDNN_TENSOR_REORDERING_INT8x32 = 1,
|
| 922 |
+
CUDNN_TENSOR_REORDERING_F16x16 = 2,
|
| 923 |
+
CUDNN_TENSOR_REORDERING_F8_128x4 = 3,
|
| 924 |
+
} cudnnBackendTensorReordering_t;
|
| 925 |
+
|
| 926 |
+
typedef enum {
|
| 927 |
+
CUDNN_ZERO_PAD = 0,
|
| 928 |
+
CUDNN_NEG_INF_PAD = 1,
|
| 929 |
+
CUDNN_EDGE_VAL_PAD = 2,
|
| 930 |
+
} cudnnPaddingMode_t;
|
| 931 |
+
|
| 932 |
+
typedef enum {
|
| 933 |
+
CUDNN_LAYER_NORM = 0,
|
| 934 |
+
CUDNN_INSTANCE_NORM = 1,
|
| 935 |
+
CUDNN_BATCH_NORM = 2,
|
| 936 |
+
CUDNN_GROUP_NORM = 3,
|
| 937 |
+
CUDNN_RMS_NORM = 4,
|
| 938 |
+
CUDNN_ADA_LAYER_NORM = 5,
|
| 939 |
+
} cudnnBackendNormMode_t;
|
| 940 |
+
|
| 941 |
+
typedef enum {
|
| 942 |
+
CUDNN_NORM_FWD_INFERENCE = 0,
|
| 943 |
+
CUDNN_NORM_FWD_TRAINING = 1,
|
| 944 |
+
} cudnnBackendNormFwdPhase_t;
|
| 945 |
+
|
| 946 |
+
cudnnStatus_t CUDNNWINAPI
|
| 947 |
+
cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor);
|
| 948 |
+
|
| 949 |
+
cudnnStatus_t CUDNNWINAPI
|
| 950 |
+
cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
|
| 951 |
+
|
| 952 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 953 |
+
cudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
|
| 954 |
+
|
| 955 |
+
cudnnStatus_t CUDNNWINAPI
|
| 956 |
+
cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
|
| 957 |
+
|
| 958 |
+
cudnnStatus_t CUDNNWINAPI
|
| 959 |
+
cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor,
|
| 960 |
+
cudnnBackendAttributeName_t attributeName,
|
| 961 |
+
cudnnBackendAttributeType_t attributeType,
|
| 962 |
+
int64_t elementCount,
|
| 963 |
+
const void *arrayOfElements);
|
| 964 |
+
|
| 965 |
+
cudnnStatus_t CUDNNWINAPI
|
| 966 |
+
cudnnBackendGetAttribute(cudnnBackendDescriptor_t const descriptor,
|
| 967 |
+
cudnnBackendAttributeName_t attributeName,
|
| 968 |
+
cudnnBackendAttributeType_t attributeType,
|
| 969 |
+
int64_t requestedElementCount,
|
| 970 |
+
int64_t *elementCount,
|
| 971 |
+
void *arrayOfElements);
|
| 972 |
+
|
| 973 |
+
cudnnStatus_t CUDNNWINAPI
|
| 974 |
+
cudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
|
| 975 |
+
|
| 976 |
+
cudnnStatus_t CUDNNWINAPI
|
| 977 |
+
cudnnBackendPopulateCudaGraph(cudnnHandle_t handle,
|
| 978 |
+
cudnnBackendDescriptor_t executionPlan,
|
| 979 |
+
cudnnBackendDescriptor_t variantPack,
|
| 980 |
+
cudaGraph_t graph);
|
| 981 |
+
|
| 982 |
+
cudnnStatus_t CUDNNWINAPI
|
| 983 |
+
cudnnBackendUpdateCudaGraph(cudnnHandle_t handle,
|
| 984 |
+
cudnnBackendDescriptor_t executionPlan,
|
| 985 |
+
cudnnBackendDescriptor_t variantPack,
|
| 986 |
+
cudaGraph_t graph);
|
| 987 |
+
|
| 988 |
+
#if defined(__cplusplus)
|
| 989 |
+
}
|
| 990 |
+
#endif
|
| 991 |
+
|
| 992 |
+
#endif /* CUDNN_GRAPH_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_ops.h
ADDED
|
@@ -0,0 +1,1316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_ops : cuDNN's basic definitions and basic operations.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_OPS_H_)
|
| 55 |
+
#define CUDNN_OPS_H_
|
| 56 |
+
|
| 57 |
+
#include <stdint.h>
|
| 58 |
+
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_graph.h"
|
| 61 |
+
|
| 62 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 63 |
+
#define CUDNN_OPS_MAJOR 9
|
| 64 |
+
#define CUDNN_OPS_MINOR 10
|
| 65 |
+
#define CUDNN_OPS_PATCH 2
|
| 66 |
+
|
| 67 |
+
#if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
|
| 68 |
+
#error Version mismatch in cuDNN OPS INFER!!!
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
#if defined(__cplusplus)
|
| 72 |
+
extern "C" {
|
| 73 |
+
#endif
|
| 74 |
+
|
| 75 |
+
/* Data structures to represent Image/Filter and the Neural Network Layer */
|
| 76 |
+
typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
|
| 77 |
+
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
|
| 78 |
+
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
|
| 79 |
+
typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
|
| 80 |
+
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
|
| 81 |
+
typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
|
| 82 |
+
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
|
| 83 |
+
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
|
| 84 |
+
typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
|
| 85 |
+
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
|
| 86 |
+
/*
|
| 87 |
+
* CUDNN Determinism
|
| 88 |
+
*/
|
| 89 |
+
typedef enum {
|
| 90 |
+
CUDNN_NON_DETERMINISTIC = 0,
|
| 91 |
+
CUDNN_DETERMINISTIC = 1,
|
| 92 |
+
} cudnnDeterminism_t;
|
| 93 |
+
|
| 94 |
+
/* Create an instance of a generic Tensor descriptor */
|
| 95 |
+
cudnnStatus_t CUDNNWINAPI
|
| 96 |
+
cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
| 97 |
+
|
| 98 |
+
cudnnStatus_t CUDNNWINAPI
|
| 99 |
+
cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
| 100 |
+
cudnnTensorFormat_t format,
|
| 101 |
+
cudnnDataType_t dataType, /* image data type */
|
| 102 |
+
int n, /* number of inputs (batch size) */
|
| 103 |
+
int c, /* number of input feature maps */
|
| 104 |
+
int h, /* height of input section */
|
| 105 |
+
int w); /* width of input section */
|
| 106 |
+
|
| 107 |
+
cudnnStatus_t CUDNNWINAPI
|
| 108 |
+
cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
| 109 |
+
cudnnDataType_t dataType, /* image data type */
|
| 110 |
+
int n, /* number of inputs (batch size) */
|
| 111 |
+
int c, /* number of input feature maps */
|
| 112 |
+
int h, /* height of input section */
|
| 113 |
+
int w, /* width of input section */
|
| 114 |
+
int nStride,
|
| 115 |
+
int cStride,
|
| 116 |
+
int hStride,
|
| 117 |
+
int wStride);
|
| 118 |
+
|
| 119 |
+
cudnnStatus_t CUDNNWINAPI
|
| 120 |
+
cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
| 121 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 122 |
+
int *n, /* number of inputs (batch size) */
|
| 123 |
+
int *c, /* number of input feature maps */
|
| 124 |
+
int *h, /* height of input section */
|
| 125 |
+
int *w, /* width of input section */
|
| 126 |
+
int *nStride,
|
| 127 |
+
int *cStride,
|
| 128 |
+
int *hStride,
|
| 129 |
+
int *wStride);
|
| 130 |
+
|
| 131 |
+
cudnnStatus_t CUDNNWINAPI
|
| 132 |
+
cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
| 133 |
+
cudnnDataType_t dataType,
|
| 134 |
+
int nbDims,
|
| 135 |
+
const int dimA[],
|
| 136 |
+
const int strideA[]);
|
| 137 |
+
|
| 138 |
+
cudnnStatus_t CUDNNWINAPI
|
| 139 |
+
cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
| 140 |
+
cudnnTensorFormat_t format,
|
| 141 |
+
cudnnDataType_t dataType,
|
| 142 |
+
int nbDims,
|
| 143 |
+
const int dimA[]);
|
| 144 |
+
|
| 145 |
+
cudnnStatus_t CUDNNWINAPI
|
| 146 |
+
cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
| 147 |
+
int nbDimsRequested,
|
| 148 |
+
cudnnDataType_t *dataType,
|
| 149 |
+
int *nbDims,
|
| 150 |
+
int dimA[],
|
| 151 |
+
int strideA[]);
|
| 152 |
+
|
| 153 |
+
cudnnStatus_t CUDNNWINAPI
|
| 154 |
+
cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
|
| 155 |
+
|
| 156 |
+
/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
|
| 157 |
+
|
| 158 |
+
1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
|
| 159 |
+
input_stride : c x h x h_stride
|
| 160 |
+
feature_stride : h x h_stride
|
| 161 |
+
h_stride : >= w ( h_stride = w if no padding)
|
| 162 |
+
w_stride : 1
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
2)Example of all images in row major with features maps interleaved
|
| 166 |
+
input_stride : c x h x h_stride
|
| 167 |
+
feature_stride : 1
|
| 168 |
+
h_stride : w x c
|
| 169 |
+
w_stride : c
|
| 170 |
+
|
| 171 |
+
3)Example of all images in column major order one batch of features after the other (with optional padding on column)
|
| 172 |
+
input_stride : c x w x w_stride
|
| 173 |
+
feature_stride : w x w_stride
|
| 174 |
+
h_stride : 1
|
| 175 |
+
w_stride : >= h
|
| 176 |
+
|
| 177 |
+
*/
|
| 178 |
+
|
| 179 |
+
/* Destroy an instance of Tensor4d descriptor */
|
| 180 |
+
cudnnStatus_t CUDNNWINAPI
|
| 181 |
+
cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
|
| 182 |
+
|
| 183 |
+
/* Fold/unfold transforms */
|
| 184 |
+
typedef enum {
|
| 185 |
+
CUDNN_TRANSFORM_FOLD = 0U,
|
| 186 |
+
CUDNN_TRANSFORM_UNFOLD = 1U,
|
| 187 |
+
} cudnnFoldingDirection_t;
|
| 188 |
+
|
| 189 |
+
/** Create a destination descriptor for cudnnTransformTensor */
|
| 190 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 191 |
+
cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
|
| 192 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 193 |
+
cudnnTensorDescriptor_t destDesc,
|
| 194 |
+
size_t *destSizeInBytes);
|
| 195 |
+
|
| 196 |
+
/** Create an empty tensor transform descriptor */
|
| 197 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 198 |
+
cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
|
| 199 |
+
|
| 200 |
+
/** Initialize a previously created tensor transform descriptor. */
|
| 201 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 202 |
+
cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
| 203 |
+
const uint32_t nbDims,
|
| 204 |
+
const cudnnTensorFormat_t destFormat,
|
| 205 |
+
const int32_t padBeforeA[],
|
| 206 |
+
const int32_t padAfterA[],
|
| 207 |
+
const uint32_t foldA[],
|
| 208 |
+
const cudnnFoldingDirection_t direction);
|
| 209 |
+
|
| 210 |
+
/**
|
| 211 |
+
* Retrieves the values stored in a previously initialized tensor transform
|
| 212 |
+
* descriptor.
|
| 213 |
+
*/
|
| 214 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 215 |
+
cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
| 216 |
+
uint32_t nbDimsRequested,
|
| 217 |
+
cudnnTensorFormat_t *destFormat,
|
| 218 |
+
int32_t padBeforeA[],
|
| 219 |
+
int32_t padAfterA[],
|
| 220 |
+
uint32_t foldA[],
|
| 221 |
+
cudnnFoldingDirection_t *direction);
|
| 222 |
+
|
| 223 |
+
/**
|
| 224 |
+
* Destroys a previously created tensor transform descriptor.
|
| 225 |
+
*/
|
| 226 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 227 |
+
cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
|
| 228 |
+
|
| 229 |
+
/* Tensor layout conversion helper (y = alpha * x + beta * y) */
|
| 230 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 231 |
+
cudnnTransformTensor(cudnnHandle_t handle,
|
| 232 |
+
const void *alpha,
|
| 233 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 234 |
+
const void *x,
|
| 235 |
+
const void *beta,
|
| 236 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 237 |
+
void *y);
|
| 238 |
+
|
| 239 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 240 |
+
cudnnTransformTensorEx(cudnnHandle_t handle,
|
| 241 |
+
const cudnnTensorTransformDescriptor_t transDesc,
|
| 242 |
+
const void *alpha,
|
| 243 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 244 |
+
const void *srcData,
|
| 245 |
+
const void *beta,
|
| 246 |
+
const cudnnTensorDescriptor_t destDesc,
|
| 247 |
+
void *destData);
|
| 248 |
+
|
| 249 |
+
/* Tensor Bias addition : C = alpha * A + beta * C */
|
| 250 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 251 |
+
cudnnAddTensor(cudnnHandle_t handle,
|
| 252 |
+
const void *alpha,
|
| 253 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 254 |
+
const void *A,
|
| 255 |
+
const void *beta,
|
| 256 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 257 |
+
void *C);
|
| 258 |
+
|
| 259 |
+
/*
|
| 260 |
+
* CUDNN OpTensor op type
|
| 261 |
+
*/
|
| 262 |
+
typedef enum {
|
| 263 |
+
CUDNN_OP_TENSOR_ADD = 0,
|
| 264 |
+
CUDNN_OP_TENSOR_MUL = 1,
|
| 265 |
+
CUDNN_OP_TENSOR_MIN = 2,
|
| 266 |
+
CUDNN_OP_TENSOR_MAX = 3,
|
| 267 |
+
CUDNN_OP_TENSOR_SQRT = 4,
|
| 268 |
+
CUDNN_OP_TENSOR_NOT = 5,
|
| 269 |
+
} cudnnOpTensorOp_t;
|
| 270 |
+
|
| 271 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 272 |
+
cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
|
| 273 |
+
|
| 274 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 275 |
+
cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
|
| 276 |
+
cudnnOpTensorOp_t opTensorOp,
|
| 277 |
+
cudnnDataType_t opTensorCompType,
|
| 278 |
+
cudnnNanPropagation_t opTensorNanOpt);
|
| 279 |
+
|
| 280 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 281 |
+
cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
|
| 282 |
+
cudnnOpTensorOp_t *opTensorOp,
|
| 283 |
+
cudnnDataType_t *opTensorCompType,
|
| 284 |
+
cudnnNanPropagation_t *opTensorNanOpt);
|
| 285 |
+
|
| 286 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 287 |
+
cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
|
| 288 |
+
|
| 289 |
+
/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
|
| 290 |
+
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnOpTensor(cudnnHandle_t handle,
|
| 293 |
+
const cudnnOpTensorDescriptor_t opTensorDesc,
|
| 294 |
+
const void *alpha1,
|
| 295 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 296 |
+
const void *A,
|
| 297 |
+
const void *alpha2,
|
| 298 |
+
const cudnnTensorDescriptor_t bDesc,
|
| 299 |
+
const void *B,
|
| 300 |
+
const void *beta,
|
| 301 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 302 |
+
void *C);
|
| 303 |
+
|
| 304 |
+
/*
|
| 305 |
+
* CUDNN ReduceTensor indices type
|
| 306 |
+
*/
|
| 307 |
+
typedef enum {
|
| 308 |
+
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
|
| 309 |
+
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
|
| 310 |
+
} cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
|
| 311 |
+
|
| 312 |
+
/*
|
| 313 |
+
* CUDNN tensor indices type size (all unsigned)
|
| 314 |
+
* Currently not supported, default is 32 bit unsigned.
|
| 315 |
+
*/
|
| 316 |
+
typedef enum {
|
| 317 |
+
CUDNN_32BIT_INDICES = 0,
|
| 318 |
+
CUDNN_64BIT_INDICES = 1,
|
| 319 |
+
CUDNN_16BIT_INDICES = 2,
|
| 320 |
+
CUDNN_8BIT_INDICES = 3,
|
| 321 |
+
} cudnnIndicesType_t CUDNN_DEPRECATED;
|
| 322 |
+
|
| 323 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 324 |
+
cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
|
| 325 |
+
|
| 326 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 327 |
+
cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 328 |
+
cudnnReduceTensorOp_t reduceTensorOp,
|
| 329 |
+
cudnnDataType_t reduceTensorCompType,
|
| 330 |
+
cudnnNanPropagation_t reduceTensorNanOpt,
|
| 331 |
+
cudnnReduceTensorIndices_t reduceTensorIndices,
|
| 332 |
+
cudnnIndicesType_t reduceTensorIndicesType);
|
| 333 |
+
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 336 |
+
cudnnReduceTensorOp_t *reduceTensorOp,
|
| 337 |
+
cudnnDataType_t *reduceTensorCompType,
|
| 338 |
+
cudnnNanPropagation_t *reduceTensorNanOpt,
|
| 339 |
+
cudnnReduceTensorIndices_t *reduceTensorIndices,
|
| 340 |
+
cudnnIndicesType_t *reduceTensorIndicesType);
|
| 341 |
+
|
| 342 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 343 |
+
cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
|
| 344 |
+
|
| 345 |
+
/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
|
| 346 |
+
* output tensors */
|
| 347 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 348 |
+
cudnnGetReductionIndicesSize(cudnnHandle_t handle,
|
| 349 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 350 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 351 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 352 |
+
size_t *sizeInBytes);
|
| 353 |
+
|
| 354 |
+
/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
|
| 355 |
+
* tensors */
|
| 356 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 357 |
+
cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
|
| 358 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 359 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 360 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 361 |
+
size_t *sizeInBytes);
|
| 362 |
+
|
| 363 |
+
/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
|
| 364 |
+
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
|
| 365 |
+
/* The indices space is ignored for reduce ops other than min or max. */
|
| 366 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 367 |
+
cudnnReduceTensor(cudnnHandle_t handle,
|
| 368 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 369 |
+
void *indices,
|
| 370 |
+
size_t indicesSizeInBytes,
|
| 371 |
+
void *workspace,
|
| 372 |
+
size_t workspaceSizeInBytes,
|
| 373 |
+
const void *alpha,
|
| 374 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 375 |
+
const void *A,
|
| 376 |
+
const void *beta,
|
| 377 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 378 |
+
void *C);
|
| 379 |
+
|
| 380 |
+
/* Set all values of a tensor to a given value : y[i] = value[0] */
|
| 381 |
+
cudnnStatus_t CUDNNWINAPI
|
| 382 |
+
cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
|
| 383 |
+
|
| 384 |
+
/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
|
| 385 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 386 |
+
cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
|
| 387 |
+
|
| 388 |
+
/* Create an instance of FilterStruct */
|
| 389 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 390 |
+
cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
| 391 |
+
|
| 392 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 393 |
+
cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
|
| 394 |
+
cudnnDataType_t dataType, /* image data type */
|
| 395 |
+
cudnnTensorFormat_t format,
|
| 396 |
+
int k, /* number of output feature maps */
|
| 397 |
+
int c, /* number of input feature maps */
|
| 398 |
+
int h, /* height of each input filter */
|
| 399 |
+
int w); /* width of each input filter */
|
| 400 |
+
|
| 401 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 402 |
+
cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
| 403 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 404 |
+
cudnnTensorFormat_t *format,
|
| 405 |
+
int *k, /* number of output feature maps */
|
| 406 |
+
int *c, /* number of input feature maps */
|
| 407 |
+
int *h, /* height of each input filter */
|
| 408 |
+
int *w); /* width of each input filter */
|
| 409 |
+
|
| 410 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 411 |
+
cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
|
| 412 |
+
cudnnDataType_t dataType, /* image data type */
|
| 413 |
+
cudnnTensorFormat_t format,
|
| 414 |
+
int nbDims,
|
| 415 |
+
const int filterDimA[]);
|
| 416 |
+
|
| 417 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 418 |
+
cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
| 419 |
+
int nbDimsRequested,
|
| 420 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 421 |
+
cudnnTensorFormat_t *format,
|
| 422 |
+
int *nbDims,
|
| 423 |
+
int filterDimA[]);
|
| 424 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 425 |
+
cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
|
| 426 |
+
|
| 427 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 428 |
+
cudnnTransformFilter(cudnnHandle_t handle,
|
| 429 |
+
const cudnnTensorTransformDescriptor_t transDesc,
|
| 430 |
+
const void *alpha,
|
| 431 |
+
const cudnnFilterDescriptor_t srcDesc,
|
| 432 |
+
const void *srcData,
|
| 433 |
+
const void *beta,
|
| 434 |
+
const cudnnFilterDescriptor_t destDesc,
|
| 435 |
+
void *destData);
|
| 436 |
+
|
| 437 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 438 |
+
cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
|
| 439 |
+
|
| 440 |
+
/*
|
| 441 |
+
* softmax algorithm
|
| 442 |
+
*/
|
| 443 |
+
typedef enum {
|
| 444 |
+
CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
|
| 445 |
+
CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
|
| 446 |
+
CUDNN_SOFTMAX_LOG = 2
|
| 447 |
+
} cudnnSoftmaxAlgorithm_t;
|
| 448 |
+
|
| 449 |
+
typedef enum {
|
| 450 |
+
CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
|
| 451 |
+
CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
|
| 452 |
+
} cudnnSoftmaxMode_t;
|
| 453 |
+
|
| 454 |
+
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 455 |
+
|
| 456 |
+
/* Function to perform forward softmax */
|
| 457 |
+
cudnnStatus_t CUDNNWINAPI
|
| 458 |
+
cudnnSoftmaxForward(cudnnHandle_t handle,
|
| 459 |
+
cudnnSoftmaxAlgorithm_t algo,
|
| 460 |
+
cudnnSoftmaxMode_t mode,
|
| 461 |
+
const void *alpha,
|
| 462 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 463 |
+
const void *x,
|
| 464 |
+
const void *beta,
|
| 465 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 466 |
+
void *y);
|
| 467 |
+
|
| 468 |
+
/*
|
| 469 |
+
* pooling mode
|
| 470 |
+
*/
|
| 471 |
+
typedef enum {
|
| 472 |
+
CUDNN_POOLING_MAX = 0,
|
| 473 |
+
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
|
| 474 |
+
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
|
| 475 |
+
CUDNN_POOLING_MAX_DETERMINISTIC = 3
|
| 476 |
+
} cudnnPoolingMode_t CUDNN_DEPRECATED;
|
| 477 |
+
|
| 478 |
+
/* Create an instance of pooling descriptor */
|
| 479 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 480 |
+
cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
| 481 |
+
|
| 482 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 483 |
+
cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
| 484 |
+
cudnnPoolingMode_t mode,
|
| 485 |
+
cudnnNanPropagation_t maxpoolingNanOpt,
|
| 486 |
+
int windowHeight,
|
| 487 |
+
int windowWidth,
|
| 488 |
+
int verticalPadding,
|
| 489 |
+
int horizontalPadding,
|
| 490 |
+
int verticalStride,
|
| 491 |
+
int horizontalStride);
|
| 492 |
+
|
| 493 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 494 |
+
cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
| 495 |
+
cudnnPoolingMode_t *mode,
|
| 496 |
+
cudnnNanPropagation_t *maxpoolingNanOpt,
|
| 497 |
+
int *windowHeight,
|
| 498 |
+
int *windowWidth,
|
| 499 |
+
int *verticalPadding,
|
| 500 |
+
int *horizontalPadding,
|
| 501 |
+
int *verticalStride,
|
| 502 |
+
int *horizontalStride);
|
| 503 |
+
|
| 504 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 505 |
+
cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
| 506 |
+
const cudnnPoolingMode_t mode,
|
| 507 |
+
const cudnnNanPropagation_t maxpoolingNanOpt,
|
| 508 |
+
int nbDims,
|
| 509 |
+
const int windowDimA[],
|
| 510 |
+
const int paddingA[],
|
| 511 |
+
const int strideA[]);
|
| 512 |
+
|
| 513 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 514 |
+
cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
| 515 |
+
int nbDimsRequested,
|
| 516 |
+
cudnnPoolingMode_t *mode,
|
| 517 |
+
cudnnNanPropagation_t *maxpoolingNanOpt,
|
| 518 |
+
int *nbDims,
|
| 519 |
+
int windowDimA[],
|
| 520 |
+
int paddingA[],
|
| 521 |
+
int strideA[]);
|
| 522 |
+
|
| 523 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 524 |
+
cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
| 525 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 526 |
+
int nbDims,
|
| 527 |
+
int outputTensorDimA[]);
|
| 528 |
+
|
| 529 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 530 |
+
cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
| 531 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 532 |
+
int *n,
|
| 533 |
+
int *c,
|
| 534 |
+
int *h,
|
| 535 |
+
int *w);
|
| 536 |
+
|
| 537 |
+
/* Destroy an instance of pooling descriptor */
|
| 538 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 539 |
+
cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
|
| 540 |
+
|
| 541 |
+
/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 542 |
+
|
| 543 |
+
/* Function to perform forward pooling */
|
| 544 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 545 |
+
cudnnPoolingForward(cudnnHandle_t handle,
|
| 546 |
+
const cudnnPoolingDescriptor_t poolingDesc,
|
| 547 |
+
const void *alpha,
|
| 548 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 549 |
+
const void *x,
|
| 550 |
+
const void *beta,
|
| 551 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 552 |
+
void *y);
|
| 553 |
+
|
| 554 |
+
/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 555 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 556 |
+
cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
|
| 557 |
+
|
| 558 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 559 |
+
cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
|
| 560 |
+
cudnnActivationMode_t mode,
|
| 561 |
+
cudnnNanPropagation_t reluNanOpt,
|
| 562 |
+
double coef); /* ceiling for clipped RELU, alpha for ELU */
|
| 563 |
+
|
| 564 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 565 |
+
cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
|
| 566 |
+
cudnnActivationMode_t *mode,
|
| 567 |
+
cudnnNanPropagation_t *reluNanOpt,
|
| 568 |
+
double *coef); /* ceiling for clipped RELU, alpha for ELU */
|
| 569 |
+
|
| 570 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 571 |
+
cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
|
| 572 |
+
|
| 573 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 574 |
+
cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
|
| 575 |
+
|
| 576 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 577 |
+
cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
|
| 578 |
+
|
| 579 |
+
/* Function to perform forward activation */
|
| 580 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 581 |
+
cudnnActivationForward(cudnnHandle_t handle,
|
| 582 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 583 |
+
const void *alpha,
|
| 584 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 585 |
+
const void *x,
|
| 586 |
+
const void *beta,
|
| 587 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 588 |
+
void *y);
|
| 589 |
+
|
| 590 |
+
/*
|
| 591 |
+
* Create an instance of LRN (Local Response Normalization) descriptor
|
| 592 |
+
* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
|
| 593 |
+
*/
|
| 594 |
+
cudnnStatus_t CUDNNWINAPI
|
| 595 |
+
cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
|
| 596 |
+
|
| 597 |
+
#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
|
| 598 |
+
#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
|
| 599 |
+
#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
|
| 600 |
+
#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
|
| 601 |
+
|
| 602 |
+
/* LRN layer mode */
|
| 603 |
+
typedef enum {
|
| 604 |
+
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
|
| 605 |
+
} cudnnLRNMode_t;
|
| 606 |
+
|
| 607 |
+
/*
|
| 608 |
+
* Uses a window [center-lookBehind, center+lookAhead], where
|
| 609 |
+
* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
|
| 610 |
+
* Values of double parameters cast to tensor data type.
|
| 611 |
+
*/
|
| 612 |
+
cudnnStatus_t CUDNNWINAPI
|
| 613 |
+
cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
|
| 614 |
+
/*
|
| 615 |
+
* Retrieve the settings currently stored in an LRN layer descriptor
|
| 616 |
+
* Any of the provided pointers can be NULL (no corresponding value will be returned)
|
| 617 |
+
*/
|
| 618 |
+
cudnnStatus_t CUDNNWINAPI
|
| 619 |
+
cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
|
| 620 |
+
|
| 621 |
+
/* Destroy an instance of LRN descriptor */
|
| 622 |
+
cudnnStatus_t CUDNNWINAPI
|
| 623 |
+
cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
|
| 624 |
+
|
| 625 |
+
/* LRN functions: output = alpha * normalize(x) + beta * old_y */
|
| 626 |
+
|
| 627 |
+
/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
|
| 628 |
+
cudnnStatus_t CUDNNWINAPI
|
| 629 |
+
cudnnLRNCrossChannelForward(cudnnHandle_t handle,
|
| 630 |
+
cudnnLRNDescriptor_t normDesc,
|
| 631 |
+
cudnnLRNMode_t lrnMode,
|
| 632 |
+
const void *alpha,
|
| 633 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 634 |
+
const void *x,
|
| 635 |
+
const void *beta,
|
| 636 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 637 |
+
void *y);
|
| 638 |
+
|
| 639 |
+
typedef enum {
|
| 640 |
+
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
|
| 641 |
+
} cudnnDivNormMode_t;
|
| 642 |
+
|
| 643 |
+
/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
|
| 644 |
+
cudnnStatus_t CUDNNWINAPI
|
| 645 |
+
cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
|
| 646 |
+
cudnnLRNDescriptor_t normDesc,
|
| 647 |
+
cudnnDivNormMode_t mode,
|
| 648 |
+
const void *alpha,
|
| 649 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
|
| 650 |
+
const void *x,
|
| 651 |
+
const void *means, /* if NULL, means are assumed to be zero */
|
| 652 |
+
void *temp,
|
| 653 |
+
void *temp2,
|
| 654 |
+
const void *beta,
|
| 655 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 656 |
+
void *y);
|
| 657 |
+
|
| 658 |
+
typedef enum {
|
| 659 |
+
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
| 660 |
+
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
|
| 661 |
+
|
| 662 |
+
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
| 663 |
+
CUDNN_BATCHNORM_SPATIAL = 1,
|
| 664 |
+
|
| 665 |
+
/*
|
| 666 |
+
* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
|
| 667 |
+
* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
|
| 668 |
+
*/
|
| 669 |
+
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
|
| 670 |
+
} cudnnBatchNormMode_t CUDNN_DEPRECATED;
|
| 671 |
+
|
| 672 |
+
#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
|
| 673 |
+
|
| 674 |
+
/*
|
| 675 |
+
* Derives a tensor descriptor from layer data descriptor for BatchNormalization
|
| 676 |
+
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
| 677 |
+
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
|
| 678 |
+
*/
|
| 679 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 680 |
+
cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
|
| 681 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 682 |
+
cudnnBatchNormMode_t mode);
|
| 683 |
+
|
| 684 |
+
typedef enum {
|
| 685 |
+
CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
|
| 686 |
+
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
|
| 687 |
+
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
|
| 688 |
+
} cudnnBatchNormOps_t CUDNN_DEPRECATED;
|
| 689 |
+
|
| 690 |
+
/*
|
| 691 |
+
* Performs Batch Normalization during Inference:
|
| 692 |
+
* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
|
| 693 |
+
* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
|
| 694 |
+
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
|
| 695 |
+
* above for notes on function arguments.
|
| 696 |
+
*/
|
| 697 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 698 |
+
cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
|
| 699 |
+
cudnnBatchNormMode_t mode,
|
| 700 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 701 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 702 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 703 |
+
const void *x, /* NxCxHxW */
|
| 704 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 705 |
+
void *y, /* NxCxHxW */
|
| 706 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 707 |
+
const void *bnScale,
|
| 708 |
+
const void *bnBias,
|
| 709 |
+
const void *estimatedMean,
|
| 710 |
+
const void *estimatedVariance,
|
| 711 |
+
double epsilon);
|
| 712 |
+
|
| 713 |
+
typedef enum {
|
| 714 |
+
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
| 715 |
+
CUDNN_NORM_PER_ACTIVATION = 0,
|
| 716 |
+
|
| 717 |
+
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
| 718 |
+
CUDNN_NORM_PER_CHANNEL = 1,
|
| 719 |
+
} cudnnNormMode_t CUDNN_DEPRECATED;
|
| 720 |
+
|
| 721 |
+
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
|
| 722 |
+
|
| 723 |
+
/*
|
| 724 |
+
* Derives a tensor descriptor from layer data descriptor for Normalization
|
| 725 |
+
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
| 726 |
+
* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
|
| 727 |
+
*/
|
| 728 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 729 |
+
cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
|
| 730 |
+
cudnnTensorDescriptor_t derivedNormMeanVarDesc,
|
| 731 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 732 |
+
cudnnNormMode_t mode,
|
| 733 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 734 |
+
|
| 735 |
+
typedef enum {
|
| 736 |
+
CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
|
| 737 |
+
CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
|
| 738 |
+
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
|
| 739 |
+
} cudnnNormOps_t CUDNN_DEPRECATED;
|
| 740 |
+
|
| 741 |
+
/*
|
| 742 |
+
* Performs Normalization during Inference:
|
| 743 |
+
* y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
|
| 744 |
+
* with normScale, normBias, runningMean, runningInvVariance tensors indexed
|
| 745 |
+
* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
|
| 746 |
+
* above for notes on function arguments.
|
| 747 |
+
*/
|
| 748 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 749 |
+
cudnnNormalizationForwardInference(cudnnHandle_t handle,
|
| 750 |
+
cudnnNormMode_t mode,
|
| 751 |
+
cudnnNormOps_t normOps,
|
| 752 |
+
cudnnNormAlgo_t algo,
|
| 753 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 754 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 755 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 756 |
+
const void *x, /* NxCxHxW */
|
| 757 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 758 |
+
const void *normScale,
|
| 759 |
+
const void *normBias,
|
| 760 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 761 |
+
const void *estimatedMean,
|
| 762 |
+
const void *estimatedVariance,
|
| 763 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 764 |
+
const void *z,
|
| 765 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 766 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 767 |
+
void *y, /* NxCxHxW */
|
| 768 |
+
double epsilon,
|
| 769 |
+
int groupCnt); /* Place hold for future work*/
|
| 770 |
+
|
| 771 |
+
/* APIs for spatial transformer network*/
|
| 772 |
+
typedef enum {
|
| 773 |
+
CUDNN_SAMPLER_BILINEAR = 0,
|
| 774 |
+
} cudnnSamplerType_t;
|
| 775 |
+
|
| 776 |
+
cudnnStatus_t CUDNNWINAPI
|
| 777 |
+
cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
|
| 778 |
+
|
| 779 |
+
cudnnStatus_t CUDNNWINAPI
|
| 780 |
+
cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
|
| 781 |
+
cudnnSamplerType_t samplerType,
|
| 782 |
+
cudnnDataType_t dataType,
|
| 783 |
+
const int nbDims,
|
| 784 |
+
const int dimA[]);
|
| 785 |
+
|
| 786 |
+
cudnnStatus_t CUDNNWINAPI
|
| 787 |
+
cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
|
| 788 |
+
|
| 789 |
+
cudnnStatus_t CUDNNWINAPI
|
| 790 |
+
cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
|
| 791 |
+
const cudnnSpatialTransformerDescriptor_t stDesc,
|
| 792 |
+
const void *theta,
|
| 793 |
+
void *grid);
|
| 794 |
+
|
| 795 |
+
cudnnStatus_t CUDNNWINAPI
|
| 796 |
+
cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
|
| 797 |
+
cudnnSpatialTransformerDescriptor_t stDesc,
|
| 798 |
+
const void *alpha,
|
| 799 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 800 |
+
const void *x,
|
| 801 |
+
const void *grid,
|
| 802 |
+
const void *beta,
|
| 803 |
+
cudnnTensorDescriptor_t yDesc,
|
| 804 |
+
void *y);
|
| 805 |
+
|
| 806 |
+
typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
|
| 807 |
+
|
| 808 |
+
cudnnStatus_t CUDNNWINAPI
|
| 809 |
+
cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
|
| 810 |
+
|
| 811 |
+
cudnnStatus_t CUDNNWINAPI
|
| 812 |
+
cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
|
| 813 |
+
|
| 814 |
+
/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
|
| 815 |
+
cudnnStatus_t CUDNNWINAPI
|
| 816 |
+
cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
|
| 817 |
+
|
| 818 |
+
/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
|
| 819 |
+
cudnnStatus_t CUDNNWINAPI
|
| 820 |
+
cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
|
| 821 |
+
|
| 822 |
+
cudnnStatus_t CUDNNWINAPI
|
| 823 |
+
cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 824 |
+
cudnnHandle_t handle,
|
| 825 |
+
float dropout,
|
| 826 |
+
void *states,
|
| 827 |
+
size_t stateSizeInBytes,
|
| 828 |
+
unsigned long long seed);
|
| 829 |
+
|
| 830 |
+
/* Restores the dropout descriptor to a previously saved-off state */
|
| 831 |
+
cudnnStatus_t CUDNNWINAPI
|
| 832 |
+
cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 833 |
+
cudnnHandle_t handle,
|
| 834 |
+
float dropout,
|
| 835 |
+
void *states,
|
| 836 |
+
size_t stateSizeInBytes,
|
| 837 |
+
unsigned long long seed);
|
| 838 |
+
|
| 839 |
+
cudnnStatus_t CUDNNWINAPI
|
| 840 |
+
cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 841 |
+
cudnnHandle_t handle,
|
| 842 |
+
float *dropout,
|
| 843 |
+
void **states,
|
| 844 |
+
unsigned long long *seed);
|
| 845 |
+
|
| 846 |
+
cudnnStatus_t CUDNNWINAPI
|
| 847 |
+
cudnnDropoutForward(cudnnHandle_t handle,
|
| 848 |
+
const cudnnDropoutDescriptor_t dropoutDesc,
|
| 849 |
+
const cudnnTensorDescriptor_t xdesc,
|
| 850 |
+
const void *x,
|
| 851 |
+
const cudnnTensorDescriptor_t ydesc,
|
| 852 |
+
void *y,
|
| 853 |
+
void *reserveSpace,
|
| 854 |
+
size_t reserveSpaceSizeInBytes);
|
| 855 |
+
|
| 856 |
+
/* TODO: move these enums out to the appropriate submodule */
|
| 857 |
+
typedef enum {
|
| 858 |
+
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
|
| 859 |
+
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
|
| 860 |
+
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
|
| 861 |
+
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
|
| 862 |
+
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
|
| 863 |
+
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
|
| 864 |
+
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
|
| 865 |
+
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
|
| 866 |
+
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
|
| 867 |
+
} cudnnConvolutionFwdAlgo_t;
|
| 868 |
+
|
| 869 |
+
typedef enum {
|
| 870 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
|
| 871 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
|
| 872 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
|
| 873 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
|
| 874 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
|
| 875 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
|
| 876 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
|
| 877 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
|
| 878 |
+
} cudnnConvolutionBwdFilterAlgo_t;
|
| 879 |
+
|
| 880 |
+
typedef enum {
|
| 881 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
|
| 882 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
|
| 883 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
|
| 884 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
|
| 885 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
|
| 886 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
|
| 887 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
|
| 888 |
+
} cudnnConvolutionBwdDataAlgo_t;
|
| 889 |
+
|
| 890 |
+
typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
|
| 891 |
+
|
| 892 |
+
/*
|
| 893 |
+
* \brief Cross-library version checker.
|
| 894 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 895 |
+
* checks whether its own version matches that of its dependencies.
|
| 896 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 897 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 898 |
+
*/
|
| 899 |
+
cudnnStatus_t CUDNNWINAPI
|
| 900 |
+
cudnnOpsVersionCheck(void);
|
| 901 |
+
|
| 902 |
+
/* Function to perform backward softmax */
|
| 903 |
+
cudnnStatus_t CUDNNWINAPI
|
| 904 |
+
cudnnSoftmaxBackward(cudnnHandle_t handle,
|
| 905 |
+
cudnnSoftmaxAlgorithm_t algo,
|
| 906 |
+
cudnnSoftmaxMode_t mode,
|
| 907 |
+
const void *alpha,
|
| 908 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 909 |
+
const void *y,
|
| 910 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 911 |
+
const void *dy,
|
| 912 |
+
const void *beta,
|
| 913 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 914 |
+
void *dx);
|
| 915 |
+
|
| 916 |
+
/* Function to perform backward pooling */
|
| 917 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 918 |
+
cudnnPoolingBackward(cudnnHandle_t handle,
|
| 919 |
+
const cudnnPoolingDescriptor_t poolingDesc,
|
| 920 |
+
const void *alpha,
|
| 921 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 922 |
+
const void *y,
|
| 923 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 924 |
+
const void *dy,
|
| 925 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 926 |
+
const void *x,
|
| 927 |
+
const void *beta,
|
| 928 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 929 |
+
void *dx);
|
| 930 |
+
|
| 931 |
+
/* Function to perform backward activation */
|
| 932 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 933 |
+
cudnnActivationBackward(cudnnHandle_t handle,
|
| 934 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 935 |
+
const void *alpha,
|
| 936 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 937 |
+
const void *y,
|
| 938 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 939 |
+
const void *dy,
|
| 940 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 941 |
+
const void *x,
|
| 942 |
+
const void *beta,
|
| 943 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 944 |
+
void *dx);
|
| 945 |
+
|
| 946 |
+
/* LRN cross-channel backward computation. Double parameters cast to tensor data type */
|
| 947 |
+
cudnnStatus_t CUDNNWINAPI
|
| 948 |
+
cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
|
| 949 |
+
cudnnLRNDescriptor_t normDesc,
|
| 950 |
+
cudnnLRNMode_t lrnMode,
|
| 951 |
+
const void *alpha,
|
| 952 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 953 |
+
const void *y,
|
| 954 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 955 |
+
const void *dy,
|
| 956 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 957 |
+
const void *x,
|
| 958 |
+
const void *beta,
|
| 959 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 960 |
+
void *dx);
|
| 961 |
+
|
| 962 |
+
cudnnStatus_t CUDNNWINAPI
|
| 963 |
+
cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
|
| 964 |
+
cudnnLRNDescriptor_t normDesc,
|
| 965 |
+
cudnnDivNormMode_t mode,
|
| 966 |
+
const void *alpha,
|
| 967 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
|
| 968 |
+
const void *x,
|
| 969 |
+
const void *means, /* if NULL, means are assumed to be zero */
|
| 970 |
+
const void *dy,
|
| 971 |
+
void *temp,
|
| 972 |
+
void *temp2,
|
| 973 |
+
const void *beta,
|
| 974 |
+
const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
|
| 975 |
+
void *dx, /* output x differential */
|
| 976 |
+
void *dMeans); /* output means differential, can be NULL */
|
| 977 |
+
|
| 978 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 979 |
+
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
|
| 980 |
+
cudnnBatchNormMode_t mode,
|
| 981 |
+
cudnnBatchNormOps_t bnOps,
|
| 982 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 983 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 984 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 985 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 986 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 987 |
+
size_t *sizeInBytes);
|
| 988 |
+
|
| 989 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 990 |
+
cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
|
| 991 |
+
cudnnBatchNormMode_t mode,
|
| 992 |
+
cudnnBatchNormOps_t bnOps,
|
| 993 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 994 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 995 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 996 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 997 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 998 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 999 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1000 |
+
size_t *sizeInBytes);
|
| 1001 |
+
|
| 1002 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1003 |
+
cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
|
| 1004 |
+
cudnnBatchNormMode_t mode,
|
| 1005 |
+
cudnnBatchNormOps_t bnOps,
|
| 1006 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1007 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1008 |
+
size_t *sizeInBytes);
|
| 1009 |
+
|
| 1010 |
+
/* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
|
| 1011 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1012 |
+
cudnnBatchNormalizationForwardTraining(
|
| 1013 |
+
cudnnHandle_t handle,
|
| 1014 |
+
cudnnBatchNormMode_t mode,
|
| 1015 |
+
|
| 1016 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1017 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1018 |
+
|
| 1019 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1020 |
+
const void *x, /* NxCxHxW */
|
| 1021 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1022 |
+
void *y, /* NxCxHxW */
|
| 1023 |
+
|
| 1024 |
+
/* Shared desc for the next 6 tensors in the argument list.
|
| 1025 |
+
Data type to be set as follows:
|
| 1026 |
+
type = (typeOf(x) == double) ? double : float
|
| 1027 |
+
Dimensions for this descriptor depend on normalization mode
|
| 1028 |
+
- Spatial Normalization : tensors are expected to have dims 1xCx1x1
|
| 1029 |
+
(normalization is performed across NxHxW)
|
| 1030 |
+
- Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
|
| 1031 |
+
(normalization is performed across N) */
|
| 1032 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 1033 |
+
|
| 1034 |
+
/* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
|
| 1035 |
+
const void *bnScale,
|
| 1036 |
+
const void *bnBias,
|
| 1037 |
+
|
| 1038 |
+
/* MUST use factor=1 in the very first call of a complete training cycle.
|
| 1039 |
+
Use a factor=1/(1+n) at N-th call to the function to get
|
| 1040 |
+
Cumulative Moving Average (CMA) behavior
|
| 1041 |
+
CMA[n] = (x[1]+...+x[n])/n
|
| 1042 |
+
Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
|
| 1043 |
+
((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
|
| 1044 |
+
CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
|
| 1045 |
+
double exponentialAverageFactor,
|
| 1046 |
+
|
| 1047 |
+
/* Used in Training phase only.
|
| 1048 |
+
runningMean = newMean*factor + runningMean*(1-factor) */
|
| 1049 |
+
void *resultRunningMean,
|
| 1050 |
+
/* Output in training mode, input in inference. Is the moving average
|
| 1051 |
+
of variance[x] (factor is applied in the same way as for runningMean) */
|
| 1052 |
+
void *resultRunningVariance,
|
| 1053 |
+
|
| 1054 |
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
| 1055 |
+
double epsilon,
|
| 1056 |
+
|
| 1057 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1058 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1059 |
+
void *resultSaveMean,
|
| 1060 |
+
void *resultSaveInvVariance);
|
| 1061 |
+
|
| 1062 |
+
/* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
|
| 1063 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1064 |
+
cudnnBatchNormalizationForwardTrainingEx(
|
| 1065 |
+
cudnnHandle_t handle,
|
| 1066 |
+
cudnnBatchNormMode_t mode,
|
| 1067 |
+
cudnnBatchNormOps_t bnOps,
|
| 1068 |
+
|
| 1069 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1070 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1071 |
+
|
| 1072 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1073 |
+
const void *xData,
|
| 1074 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1075 |
+
const void *zData,
|
| 1076 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1077 |
+
void *yData,
|
| 1078 |
+
|
| 1079 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 1080 |
+
const void *bnScale,
|
| 1081 |
+
const void *bnBias,
|
| 1082 |
+
|
| 1083 |
+
double exponentialAverageFactor,
|
| 1084 |
+
void *resultRunningMean,
|
| 1085 |
+
void *resultRunningVariance,
|
| 1086 |
+
|
| 1087 |
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
| 1088 |
+
double epsilon,
|
| 1089 |
+
|
| 1090 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1091 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1092 |
+
void *resultSaveMean,
|
| 1093 |
+
void *resultSaveInvVariance,
|
| 1094 |
+
|
| 1095 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1096 |
+
void *workspace,
|
| 1097 |
+
size_t workSpaceSizeInBytes,
|
| 1098 |
+
void *reserveSpace,
|
| 1099 |
+
size_t reserveSpaceSizeInBytes);
|
| 1100 |
+
|
| 1101 |
+
/* Performs backward pass of Batch Normalization layer. Returns x gradient,
|
| 1102 |
+
* bnScale gradient and bnBias gradient */
|
| 1103 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1104 |
+
cudnnBatchNormalizationBackward(cudnnHandle_t handle,
|
| 1105 |
+
cudnnBatchNormMode_t mode,
|
| 1106 |
+
const void *alphaDataDiff,
|
| 1107 |
+
const void *betaDataDiff,
|
| 1108 |
+
const void *alphaParamDiff,
|
| 1109 |
+
const void *betaParamDiff,
|
| 1110 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
|
| 1111 |
+
const void *x,
|
| 1112 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1113 |
+
const void *dy,
|
| 1114 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1115 |
+
void *dx,
|
| 1116 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1117 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 1118 |
+
const void *bnScale, /* bnBias doesn't affect backpropagation */
|
| 1119 |
+
/* scale and bias diff are not backpropagated below this layer */
|
| 1120 |
+
void *dBnScaleResult,
|
| 1121 |
+
void *dBnBiasResult,
|
| 1122 |
+
/* Same epsilon as forward pass */
|
| 1123 |
+
double epsilon,
|
| 1124 |
+
|
| 1125 |
+
/* Optionally cached intermediate results from
|
| 1126 |
+
forward pass */
|
| 1127 |
+
const void *savedMean,
|
| 1128 |
+
const void *savedInvVariance);
|
| 1129 |
+
|
| 1130 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1131 |
+
cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
|
| 1132 |
+
cudnnBatchNormMode_t mode,
|
| 1133 |
+
cudnnBatchNormOps_t bnOps,
|
| 1134 |
+
|
| 1135 |
+
const void *alphaDataDiff,
|
| 1136 |
+
const void *betaDataDiff,
|
| 1137 |
+
const void *alphaParamDiff,
|
| 1138 |
+
const void *betaParamDiff,
|
| 1139 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1140 |
+
const void *xData,
|
| 1141 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1142 |
+
const void *yData,
|
| 1143 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1144 |
+
const void *dyData,
|
| 1145 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1146 |
+
void *dzData,
|
| 1147 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1148 |
+
void *dxData,
|
| 1149 |
+
|
| 1150 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1151 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 1152 |
+
const void *bnScaleData,
|
| 1153 |
+
const void *bnBiasData, /* needed if there is activation */
|
| 1154 |
+
void *dBnScaleData,
|
| 1155 |
+
void *dBnBiasData,
|
| 1156 |
+
double epsilon, /* Same epsilon as forward pass */
|
| 1157 |
+
|
| 1158 |
+
/* Optionally cached intermediate results from
|
| 1159 |
+
forward pass */
|
| 1160 |
+
const void *savedMean,
|
| 1161 |
+
const void *savedInvVariance,
|
| 1162 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1163 |
+
void *workSpace,
|
| 1164 |
+
size_t workSpaceSizeInBytes,
|
| 1165 |
+
void *reserveSpace,
|
| 1166 |
+
size_t reserveSpaceSizeInBytes);
|
| 1167 |
+
|
| 1168 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1169 |
+
cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
|
| 1170 |
+
cudnnNormMode_t mode,
|
| 1171 |
+
cudnnNormOps_t normOps,
|
| 1172 |
+
cudnnNormAlgo_t algo,
|
| 1173 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1174 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1175 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1176 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 1177 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1178 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1179 |
+
size_t *sizeInBytes,
|
| 1180 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1181 |
+
|
| 1182 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1183 |
+
cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
|
| 1184 |
+
cudnnNormMode_t mode,
|
| 1185 |
+
cudnnNormOps_t normOps,
|
| 1186 |
+
cudnnNormAlgo_t algo,
|
| 1187 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1188 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1189 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1190 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1191 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1192 |
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
| 1193 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1194 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1195 |
+
size_t *sizeInBytes,
|
| 1196 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1197 |
+
|
| 1198 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1199 |
+
cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
|
| 1200 |
+
cudnnNormMode_t mode,
|
| 1201 |
+
cudnnNormOps_t normOps,
|
| 1202 |
+
cudnnNormAlgo_t algo,
|
| 1203 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1204 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1205 |
+
size_t *sizeInBytes,
|
| 1206 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1207 |
+
|
| 1208 |
+
/* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
|
| 1209 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1210 |
+
cudnnNormalizationForwardTraining(cudnnHandle_t handle,
|
| 1211 |
+
cudnnNormMode_t mode,
|
| 1212 |
+
cudnnNormOps_t normOps,
|
| 1213 |
+
cudnnNormAlgo_t algo,
|
| 1214 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1215 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1216 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1217 |
+
const void *xData,
|
| 1218 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 1219 |
+
const void *normScale,
|
| 1220 |
+
const void *normBias,
|
| 1221 |
+
double exponentialAverageFactor,
|
| 1222 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1223 |
+
void *resultRunningMean,
|
| 1224 |
+
void *resultRunningVariance,
|
| 1225 |
+
/* Has to be >= 0. Should be the same in forward and backward functions. */
|
| 1226 |
+
double epsilon,
|
| 1227 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1228 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1229 |
+
void *resultSaveMean,
|
| 1230 |
+
void *resultSaveInvVariance,
|
| 1231 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1232 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1233 |
+
const void *zData,
|
| 1234 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1235 |
+
void *yData,
|
| 1236 |
+
void *workspace,
|
| 1237 |
+
size_t workSpaceSizeInBytes,
|
| 1238 |
+
void *reserveSpace,
|
| 1239 |
+
size_t reserveSpaceSizeInBytes,
|
| 1240 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1241 |
+
|
| 1242 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1243 |
+
cudnnNormalizationBackward(cudnnHandle_t handle,
|
| 1244 |
+
cudnnNormMode_t mode,
|
| 1245 |
+
cudnnNormOps_t normOps,
|
| 1246 |
+
cudnnNormAlgo_t algo,
|
| 1247 |
+
const void *alphaDataDiff,
|
| 1248 |
+
const void *betaDataDiff,
|
| 1249 |
+
const void *alphaParamDiff,
|
| 1250 |
+
const void *betaParamDiff,
|
| 1251 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1252 |
+
const void *xData,
|
| 1253 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1254 |
+
const void *yData,
|
| 1255 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1256 |
+
const void *dyData,
|
| 1257 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1258 |
+
void *dzData,
|
| 1259 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1260 |
+
void *dxData,
|
| 1261 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1262 |
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
| 1263 |
+
const void *normScaleData,
|
| 1264 |
+
const void *normBiasData, /* needed if there is activation */
|
| 1265 |
+
void *dNormScaleData,
|
| 1266 |
+
void *dNormBiasData,
|
| 1267 |
+
double epsilon, /* Same epsilon as forward pass */
|
| 1268 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1269 |
+
/* Optionally cached intermediate results from
|
| 1270 |
+
forward pass */
|
| 1271 |
+
const void *savedMean,
|
| 1272 |
+
const void *savedInvVariance,
|
| 1273 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1274 |
+
void *workSpace,
|
| 1275 |
+
size_t workSpaceSizeInBytes,
|
| 1276 |
+
void *reserveSpace,
|
| 1277 |
+
size_t reserveSpaceSizeInBytes,
|
| 1278 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1279 |
+
|
| 1280 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1281 |
+
cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
|
| 1282 |
+
const cudnnSpatialTransformerDescriptor_t stDesc,
|
| 1283 |
+
const void *dgrid,
|
| 1284 |
+
void *dtheta);
|
| 1285 |
+
|
| 1286 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1287 |
+
cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
|
| 1288 |
+
cudnnSpatialTransformerDescriptor_t stDesc,
|
| 1289 |
+
const void *alpha,
|
| 1290 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1291 |
+
const void *x,
|
| 1292 |
+
const void *beta,
|
| 1293 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1294 |
+
void *dx,
|
| 1295 |
+
const void *alphaDgrid,
|
| 1296 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1297 |
+
const void *dy,
|
| 1298 |
+
const void *grid,
|
| 1299 |
+
const void *betaDgrid,
|
| 1300 |
+
void *dgrid);
|
| 1301 |
+
|
| 1302 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1303 |
+
cudnnDropoutBackward(cudnnHandle_t handle,
|
| 1304 |
+
const cudnnDropoutDescriptor_t dropoutDesc,
|
| 1305 |
+
const cudnnTensorDescriptor_t dydesc,
|
| 1306 |
+
const void *dy,
|
| 1307 |
+
const cudnnTensorDescriptor_t dxdesc,
|
| 1308 |
+
void *dx,
|
| 1309 |
+
void *reserveSpace,
|
| 1310 |
+
size_t reserveSpaceSizeInBytes);
|
| 1311 |
+
|
| 1312 |
+
#if defined(__cplusplus)
|
| 1313 |
+
}
|
| 1314 |
+
#endif
|
| 1315 |
+
|
| 1316 |
+
#endif /* CUDNN_OPS_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_ops_v9.h
ADDED
|
@@ -0,0 +1,1316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_ops : cuDNN's basic definitions and basic operations.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_OPS_H_)
|
| 55 |
+
#define CUDNN_OPS_H_
|
| 56 |
+
|
| 57 |
+
#include <stdint.h>
|
| 58 |
+
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_graph.h"
|
| 61 |
+
|
| 62 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 63 |
+
#define CUDNN_OPS_MAJOR 9
|
| 64 |
+
#define CUDNN_OPS_MINOR 10
|
| 65 |
+
#define CUDNN_OPS_PATCH 2
|
| 66 |
+
|
| 67 |
+
#if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
|
| 68 |
+
#error Version mismatch in cuDNN OPS INFER!!!
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
#if defined(__cplusplus)
|
| 72 |
+
extern "C" {
|
| 73 |
+
#endif
|
| 74 |
+
|
| 75 |
+
/* Data structures to represent Image/Filter and the Neural Network Layer */
|
| 76 |
+
typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
|
| 77 |
+
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
|
| 78 |
+
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
|
| 79 |
+
typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
|
| 80 |
+
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
|
| 81 |
+
typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
|
| 82 |
+
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
|
| 83 |
+
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
|
| 84 |
+
typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
|
| 85 |
+
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
|
| 86 |
+
/*
|
| 87 |
+
* CUDNN Determinism
|
| 88 |
+
*/
|
| 89 |
+
typedef enum {
|
| 90 |
+
CUDNN_NON_DETERMINISTIC = 0,
|
| 91 |
+
CUDNN_DETERMINISTIC = 1,
|
| 92 |
+
} cudnnDeterminism_t;
|
| 93 |
+
|
| 94 |
+
/* Create an instance of a generic Tensor descriptor */
|
| 95 |
+
cudnnStatus_t CUDNNWINAPI
|
| 96 |
+
cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
| 97 |
+
|
| 98 |
+
cudnnStatus_t CUDNNWINAPI
|
| 99 |
+
cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
| 100 |
+
cudnnTensorFormat_t format,
|
| 101 |
+
cudnnDataType_t dataType, /* image data type */
|
| 102 |
+
int n, /* number of inputs (batch size) */
|
| 103 |
+
int c, /* number of input feature maps */
|
| 104 |
+
int h, /* height of input section */
|
| 105 |
+
int w); /* width of input section */
|
| 106 |
+
|
| 107 |
+
cudnnStatus_t CUDNNWINAPI
|
| 108 |
+
cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
| 109 |
+
cudnnDataType_t dataType, /* image data type */
|
| 110 |
+
int n, /* number of inputs (batch size) */
|
| 111 |
+
int c, /* number of input feature maps */
|
| 112 |
+
int h, /* height of input section */
|
| 113 |
+
int w, /* width of input section */
|
| 114 |
+
int nStride,
|
| 115 |
+
int cStride,
|
| 116 |
+
int hStride,
|
| 117 |
+
int wStride);
|
| 118 |
+
|
| 119 |
+
cudnnStatus_t CUDNNWINAPI
|
| 120 |
+
cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
| 121 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 122 |
+
int *n, /* number of inputs (batch size) */
|
| 123 |
+
int *c, /* number of input feature maps */
|
| 124 |
+
int *h, /* height of input section */
|
| 125 |
+
int *w, /* width of input section */
|
| 126 |
+
int *nStride,
|
| 127 |
+
int *cStride,
|
| 128 |
+
int *hStride,
|
| 129 |
+
int *wStride);
|
| 130 |
+
|
| 131 |
+
cudnnStatus_t CUDNNWINAPI
|
| 132 |
+
cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
| 133 |
+
cudnnDataType_t dataType,
|
| 134 |
+
int nbDims,
|
| 135 |
+
const int dimA[],
|
| 136 |
+
const int strideA[]);
|
| 137 |
+
|
| 138 |
+
cudnnStatus_t CUDNNWINAPI
|
| 139 |
+
cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
| 140 |
+
cudnnTensorFormat_t format,
|
| 141 |
+
cudnnDataType_t dataType,
|
| 142 |
+
int nbDims,
|
| 143 |
+
const int dimA[]);
|
| 144 |
+
|
| 145 |
+
cudnnStatus_t CUDNNWINAPI
|
| 146 |
+
cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
| 147 |
+
int nbDimsRequested,
|
| 148 |
+
cudnnDataType_t *dataType,
|
| 149 |
+
int *nbDims,
|
| 150 |
+
int dimA[],
|
| 151 |
+
int strideA[]);
|
| 152 |
+
|
| 153 |
+
cudnnStatus_t CUDNNWINAPI
|
| 154 |
+
cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
|
| 155 |
+
|
| 156 |
+
/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
|
| 157 |
+
|
| 158 |
+
1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
|
| 159 |
+
input_stride : c x h x h_stride
|
| 160 |
+
feature_stride : h x h_stride
|
| 161 |
+
h_stride : >= w ( h_stride = w if no padding)
|
| 162 |
+
w_stride : 1
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
2)Example of all images in row major with features maps interleaved
|
| 166 |
+
input_stride : c x h x h_stride
|
| 167 |
+
feature_stride : 1
|
| 168 |
+
h_stride : w x c
|
| 169 |
+
w_stride : c
|
| 170 |
+
|
| 171 |
+
3)Example of all images in column major order one batch of features after the other (with optional padding on column)
|
| 172 |
+
input_stride : c x w x w_stride
|
| 173 |
+
feature_stride : w x w_stride
|
| 174 |
+
h_stride : 1
|
| 175 |
+
w_stride : >= h
|
| 176 |
+
|
| 177 |
+
*/
|
| 178 |
+
|
| 179 |
+
/* Destroy an instance of Tensor4d descriptor */
|
| 180 |
+
cudnnStatus_t CUDNNWINAPI
|
| 181 |
+
cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
|
| 182 |
+
|
| 183 |
+
/* Fold/unfold transforms */
|
| 184 |
+
typedef enum {
|
| 185 |
+
CUDNN_TRANSFORM_FOLD = 0U,
|
| 186 |
+
CUDNN_TRANSFORM_UNFOLD = 1U,
|
| 187 |
+
} cudnnFoldingDirection_t;
|
| 188 |
+
|
| 189 |
+
/** Create a destination descriptor for cudnnTransformTensor */
|
| 190 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 191 |
+
cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
|
| 192 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 193 |
+
cudnnTensorDescriptor_t destDesc,
|
| 194 |
+
size_t *destSizeInBytes);
|
| 195 |
+
|
| 196 |
+
/** Create an empty tensor transform descriptor */
|
| 197 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 198 |
+
cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
|
| 199 |
+
|
| 200 |
+
/** Initialize a previously created tensor transform descriptor. */
|
| 201 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 202 |
+
cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
| 203 |
+
const uint32_t nbDims,
|
| 204 |
+
const cudnnTensorFormat_t destFormat,
|
| 205 |
+
const int32_t padBeforeA[],
|
| 206 |
+
const int32_t padAfterA[],
|
| 207 |
+
const uint32_t foldA[],
|
| 208 |
+
const cudnnFoldingDirection_t direction);
|
| 209 |
+
|
| 210 |
+
/**
|
| 211 |
+
* Retrieves the values stored in a previously initialized tensor transform
|
| 212 |
+
* descriptor.
|
| 213 |
+
*/
|
| 214 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 215 |
+
cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
| 216 |
+
uint32_t nbDimsRequested,
|
| 217 |
+
cudnnTensorFormat_t *destFormat,
|
| 218 |
+
int32_t padBeforeA[],
|
| 219 |
+
int32_t padAfterA[],
|
| 220 |
+
uint32_t foldA[],
|
| 221 |
+
cudnnFoldingDirection_t *direction);
|
| 222 |
+
|
| 223 |
+
/**
|
| 224 |
+
* Destroys a previously created tensor transform descriptor.
|
| 225 |
+
*/
|
| 226 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 227 |
+
cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
|
| 228 |
+
|
| 229 |
+
/* Tensor layout conversion helper (y = alpha * x + beta * y) */
|
| 230 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 231 |
+
cudnnTransformTensor(cudnnHandle_t handle,
|
| 232 |
+
const void *alpha,
|
| 233 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 234 |
+
const void *x,
|
| 235 |
+
const void *beta,
|
| 236 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 237 |
+
void *y);
|
| 238 |
+
|
| 239 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 240 |
+
cudnnTransformTensorEx(cudnnHandle_t handle,
|
| 241 |
+
const cudnnTensorTransformDescriptor_t transDesc,
|
| 242 |
+
const void *alpha,
|
| 243 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 244 |
+
const void *srcData,
|
| 245 |
+
const void *beta,
|
| 246 |
+
const cudnnTensorDescriptor_t destDesc,
|
| 247 |
+
void *destData);
|
| 248 |
+
|
| 249 |
+
/* Tensor Bias addition : C = alpha * A + beta * C */
|
| 250 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 251 |
+
cudnnAddTensor(cudnnHandle_t handle,
|
| 252 |
+
const void *alpha,
|
| 253 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 254 |
+
const void *A,
|
| 255 |
+
const void *beta,
|
| 256 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 257 |
+
void *C);
|
| 258 |
+
|
| 259 |
+
/*
|
| 260 |
+
* CUDNN OpTensor op type
|
| 261 |
+
*/
|
| 262 |
+
typedef enum {
|
| 263 |
+
CUDNN_OP_TENSOR_ADD = 0,
|
| 264 |
+
CUDNN_OP_TENSOR_MUL = 1,
|
| 265 |
+
CUDNN_OP_TENSOR_MIN = 2,
|
| 266 |
+
CUDNN_OP_TENSOR_MAX = 3,
|
| 267 |
+
CUDNN_OP_TENSOR_SQRT = 4,
|
| 268 |
+
CUDNN_OP_TENSOR_NOT = 5,
|
| 269 |
+
} cudnnOpTensorOp_t;
|
| 270 |
+
|
| 271 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 272 |
+
cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
|
| 273 |
+
|
| 274 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 275 |
+
cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
|
| 276 |
+
cudnnOpTensorOp_t opTensorOp,
|
| 277 |
+
cudnnDataType_t opTensorCompType,
|
| 278 |
+
cudnnNanPropagation_t opTensorNanOpt);
|
| 279 |
+
|
| 280 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 281 |
+
cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
|
| 282 |
+
cudnnOpTensorOp_t *opTensorOp,
|
| 283 |
+
cudnnDataType_t *opTensorCompType,
|
| 284 |
+
cudnnNanPropagation_t *opTensorNanOpt);
|
| 285 |
+
|
| 286 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 287 |
+
cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
|
| 288 |
+
|
| 289 |
+
/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
|
| 290 |
+
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
|
| 291 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 292 |
+
cudnnOpTensor(cudnnHandle_t handle,
|
| 293 |
+
const cudnnOpTensorDescriptor_t opTensorDesc,
|
| 294 |
+
const void *alpha1,
|
| 295 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 296 |
+
const void *A,
|
| 297 |
+
const void *alpha2,
|
| 298 |
+
const cudnnTensorDescriptor_t bDesc,
|
| 299 |
+
const void *B,
|
| 300 |
+
const void *beta,
|
| 301 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 302 |
+
void *C);
|
| 303 |
+
|
| 304 |
+
/*
|
| 305 |
+
* CUDNN ReduceTensor indices type
|
| 306 |
+
*/
|
| 307 |
+
typedef enum {
|
| 308 |
+
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
|
| 309 |
+
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
|
| 310 |
+
} cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
|
| 311 |
+
|
| 312 |
+
/*
|
| 313 |
+
* CUDNN tensor indices type size (all unsigned)
|
| 314 |
+
* Currently not supported, default is 32 bit unsigned.
|
| 315 |
+
*/
|
| 316 |
+
typedef enum {
|
| 317 |
+
CUDNN_32BIT_INDICES = 0,
|
| 318 |
+
CUDNN_64BIT_INDICES = 1,
|
| 319 |
+
CUDNN_16BIT_INDICES = 2,
|
| 320 |
+
CUDNN_8BIT_INDICES = 3,
|
| 321 |
+
} cudnnIndicesType_t CUDNN_DEPRECATED;
|
| 322 |
+
|
| 323 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 324 |
+
cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
|
| 325 |
+
|
| 326 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 327 |
+
cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 328 |
+
cudnnReduceTensorOp_t reduceTensorOp,
|
| 329 |
+
cudnnDataType_t reduceTensorCompType,
|
| 330 |
+
cudnnNanPropagation_t reduceTensorNanOpt,
|
| 331 |
+
cudnnReduceTensorIndices_t reduceTensorIndices,
|
| 332 |
+
cudnnIndicesType_t reduceTensorIndicesType);
|
| 333 |
+
|
| 334 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 336 |
+
cudnnReduceTensorOp_t *reduceTensorOp,
|
| 337 |
+
cudnnDataType_t *reduceTensorCompType,
|
| 338 |
+
cudnnNanPropagation_t *reduceTensorNanOpt,
|
| 339 |
+
cudnnReduceTensorIndices_t *reduceTensorIndices,
|
| 340 |
+
cudnnIndicesType_t *reduceTensorIndicesType);
|
| 341 |
+
|
| 342 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 343 |
+
cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
|
| 344 |
+
|
| 345 |
+
/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
|
| 346 |
+
* output tensors */
|
| 347 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 348 |
+
cudnnGetReductionIndicesSize(cudnnHandle_t handle,
|
| 349 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 350 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 351 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 352 |
+
size_t *sizeInBytes);
|
| 353 |
+
|
| 354 |
+
/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
|
| 355 |
+
* tensors */
|
| 356 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 357 |
+
cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
|
| 358 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 359 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 360 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 361 |
+
size_t *sizeInBytes);
|
| 362 |
+
|
| 363 |
+
/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
|
| 364 |
+
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
|
| 365 |
+
/* The indices space is ignored for reduce ops other than min or max. */
|
| 366 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 367 |
+
cudnnReduceTensor(cudnnHandle_t handle,
|
| 368 |
+
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
| 369 |
+
void *indices,
|
| 370 |
+
size_t indicesSizeInBytes,
|
| 371 |
+
void *workspace,
|
| 372 |
+
size_t workspaceSizeInBytes,
|
| 373 |
+
const void *alpha,
|
| 374 |
+
const cudnnTensorDescriptor_t aDesc,
|
| 375 |
+
const void *A,
|
| 376 |
+
const void *beta,
|
| 377 |
+
const cudnnTensorDescriptor_t cDesc,
|
| 378 |
+
void *C);
|
| 379 |
+
|
| 380 |
+
/* Set all values of a tensor to a given value : y[i] = value[0] */
|
| 381 |
+
cudnnStatus_t CUDNNWINAPI
|
| 382 |
+
cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
|
| 383 |
+
|
| 384 |
+
/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
|
| 385 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 386 |
+
cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
|
| 387 |
+
|
| 388 |
+
/* Create an instance of FilterStruct */
|
| 389 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 390 |
+
cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
| 391 |
+
|
| 392 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 393 |
+
cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
|
| 394 |
+
cudnnDataType_t dataType, /* image data type */
|
| 395 |
+
cudnnTensorFormat_t format,
|
| 396 |
+
int k, /* number of output feature maps */
|
| 397 |
+
int c, /* number of input feature maps */
|
| 398 |
+
int h, /* height of each input filter */
|
| 399 |
+
int w); /* width of each input filter */
|
| 400 |
+
|
| 401 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 402 |
+
cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
| 403 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 404 |
+
cudnnTensorFormat_t *format,
|
| 405 |
+
int *k, /* number of output feature maps */
|
| 406 |
+
int *c, /* number of input feature maps */
|
| 407 |
+
int *h, /* height of each input filter */
|
| 408 |
+
int *w); /* width of each input filter */
|
| 409 |
+
|
| 410 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 411 |
+
cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
|
| 412 |
+
cudnnDataType_t dataType, /* image data type */
|
| 413 |
+
cudnnTensorFormat_t format,
|
| 414 |
+
int nbDims,
|
| 415 |
+
const int filterDimA[]);
|
| 416 |
+
|
| 417 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 418 |
+
cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
| 419 |
+
int nbDimsRequested,
|
| 420 |
+
cudnnDataType_t *dataType, /* image data type */
|
| 421 |
+
cudnnTensorFormat_t *format,
|
| 422 |
+
int *nbDims,
|
| 423 |
+
int filterDimA[]);
|
| 424 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 425 |
+
cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
|
| 426 |
+
|
| 427 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 428 |
+
cudnnTransformFilter(cudnnHandle_t handle,
|
| 429 |
+
const cudnnTensorTransformDescriptor_t transDesc,
|
| 430 |
+
const void *alpha,
|
| 431 |
+
const cudnnFilterDescriptor_t srcDesc,
|
| 432 |
+
const void *srcData,
|
| 433 |
+
const void *beta,
|
| 434 |
+
const cudnnFilterDescriptor_t destDesc,
|
| 435 |
+
void *destData);
|
| 436 |
+
|
| 437 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 438 |
+
cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
|
| 439 |
+
|
| 440 |
+
/*
|
| 441 |
+
* softmax algorithm
|
| 442 |
+
*/
|
| 443 |
+
typedef enum {
|
| 444 |
+
CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
|
| 445 |
+
CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
|
| 446 |
+
CUDNN_SOFTMAX_LOG = 2
|
| 447 |
+
} cudnnSoftmaxAlgorithm_t;
|
| 448 |
+
|
| 449 |
+
typedef enum {
|
| 450 |
+
CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
|
| 451 |
+
CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
|
| 452 |
+
} cudnnSoftmaxMode_t;
|
| 453 |
+
|
| 454 |
+
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 455 |
+
|
| 456 |
+
/* Function to perform forward softmax */
|
| 457 |
+
cudnnStatus_t CUDNNWINAPI
|
| 458 |
+
cudnnSoftmaxForward(cudnnHandle_t handle,
|
| 459 |
+
cudnnSoftmaxAlgorithm_t algo,
|
| 460 |
+
cudnnSoftmaxMode_t mode,
|
| 461 |
+
const void *alpha,
|
| 462 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 463 |
+
const void *x,
|
| 464 |
+
const void *beta,
|
| 465 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 466 |
+
void *y);
|
| 467 |
+
|
| 468 |
+
/*
|
| 469 |
+
* pooling mode
|
| 470 |
+
*/
|
| 471 |
+
typedef enum {
|
| 472 |
+
CUDNN_POOLING_MAX = 0,
|
| 473 |
+
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
|
| 474 |
+
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
|
| 475 |
+
CUDNN_POOLING_MAX_DETERMINISTIC = 3
|
| 476 |
+
} cudnnPoolingMode_t CUDNN_DEPRECATED;
|
| 477 |
+
|
| 478 |
+
/* Create an instance of pooling descriptor */
|
| 479 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 480 |
+
cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
| 481 |
+
|
| 482 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 483 |
+
cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
| 484 |
+
cudnnPoolingMode_t mode,
|
| 485 |
+
cudnnNanPropagation_t maxpoolingNanOpt,
|
| 486 |
+
int windowHeight,
|
| 487 |
+
int windowWidth,
|
| 488 |
+
int verticalPadding,
|
| 489 |
+
int horizontalPadding,
|
| 490 |
+
int verticalStride,
|
| 491 |
+
int horizontalStride);
|
| 492 |
+
|
| 493 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 494 |
+
cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
| 495 |
+
cudnnPoolingMode_t *mode,
|
| 496 |
+
cudnnNanPropagation_t *maxpoolingNanOpt,
|
| 497 |
+
int *windowHeight,
|
| 498 |
+
int *windowWidth,
|
| 499 |
+
int *verticalPadding,
|
| 500 |
+
int *horizontalPadding,
|
| 501 |
+
int *verticalStride,
|
| 502 |
+
int *horizontalStride);
|
| 503 |
+
|
| 504 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 505 |
+
cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
| 506 |
+
const cudnnPoolingMode_t mode,
|
| 507 |
+
const cudnnNanPropagation_t maxpoolingNanOpt,
|
| 508 |
+
int nbDims,
|
| 509 |
+
const int windowDimA[],
|
| 510 |
+
const int paddingA[],
|
| 511 |
+
const int strideA[]);
|
| 512 |
+
|
| 513 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 514 |
+
cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
| 515 |
+
int nbDimsRequested,
|
| 516 |
+
cudnnPoolingMode_t *mode,
|
| 517 |
+
cudnnNanPropagation_t *maxpoolingNanOpt,
|
| 518 |
+
int *nbDims,
|
| 519 |
+
int windowDimA[],
|
| 520 |
+
int paddingA[],
|
| 521 |
+
int strideA[]);
|
| 522 |
+
|
| 523 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 524 |
+
cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
| 525 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 526 |
+
int nbDims,
|
| 527 |
+
int outputTensorDimA[]);
|
| 528 |
+
|
| 529 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 530 |
+
cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
| 531 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 532 |
+
int *n,
|
| 533 |
+
int *c,
|
| 534 |
+
int *h,
|
| 535 |
+
int *w);
|
| 536 |
+
|
| 537 |
+
/* Destroy an instance of pooling descriptor */
|
| 538 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 539 |
+
cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
|
| 540 |
+
|
| 541 |
+
/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 542 |
+
|
| 543 |
+
/* Function to perform forward pooling */
|
| 544 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 545 |
+
cudnnPoolingForward(cudnnHandle_t handle,
|
| 546 |
+
const cudnnPoolingDescriptor_t poolingDesc,
|
| 547 |
+
const void *alpha,
|
| 548 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 549 |
+
const void *x,
|
| 550 |
+
const void *beta,
|
| 551 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 552 |
+
void *y);
|
| 553 |
+
|
| 554 |
+
/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 555 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 556 |
+
cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
|
| 557 |
+
|
| 558 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 559 |
+
cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
|
| 560 |
+
cudnnActivationMode_t mode,
|
| 561 |
+
cudnnNanPropagation_t reluNanOpt,
|
| 562 |
+
double coef); /* ceiling for clipped RELU, alpha for ELU */
|
| 563 |
+
|
| 564 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 565 |
+
cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
|
| 566 |
+
cudnnActivationMode_t *mode,
|
| 567 |
+
cudnnNanPropagation_t *reluNanOpt,
|
| 568 |
+
double *coef); /* ceiling for clipped RELU, alpha for ELU */
|
| 569 |
+
|
| 570 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 571 |
+
cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
|
| 572 |
+
|
| 573 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 574 |
+
cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
|
| 575 |
+
|
| 576 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 577 |
+
cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
|
| 578 |
+
|
| 579 |
+
/* Function to perform forward activation */
|
| 580 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 581 |
+
cudnnActivationForward(cudnnHandle_t handle,
|
| 582 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 583 |
+
const void *alpha,
|
| 584 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 585 |
+
const void *x,
|
| 586 |
+
const void *beta,
|
| 587 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 588 |
+
void *y);
|
| 589 |
+
|
| 590 |
+
/*
|
| 591 |
+
* Create an instance of LRN (Local Response Normalization) descriptor
|
| 592 |
+
* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
|
| 593 |
+
*/
|
| 594 |
+
cudnnStatus_t CUDNNWINAPI
|
| 595 |
+
cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
|
| 596 |
+
|
| 597 |
+
#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
|
| 598 |
+
#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
|
| 599 |
+
#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
|
| 600 |
+
#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
|
| 601 |
+
|
| 602 |
+
/* LRN layer mode */
|
| 603 |
+
typedef enum {
|
| 604 |
+
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
|
| 605 |
+
} cudnnLRNMode_t;
|
| 606 |
+
|
| 607 |
+
/*
|
| 608 |
+
* Uses a window [center-lookBehind, center+lookAhead], where
|
| 609 |
+
* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
|
| 610 |
+
* Values of double parameters cast to tensor data type.
|
| 611 |
+
*/
|
| 612 |
+
cudnnStatus_t CUDNNWINAPI
|
| 613 |
+
cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
|
| 614 |
+
/*
|
| 615 |
+
* Retrieve the settings currently stored in an LRN layer descriptor
|
| 616 |
+
* Any of the provided pointers can be NULL (no corresponding value will be returned)
|
| 617 |
+
*/
|
| 618 |
+
cudnnStatus_t CUDNNWINAPI
|
| 619 |
+
cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
|
| 620 |
+
|
| 621 |
+
/* Destroy an instance of LRN descriptor */
|
| 622 |
+
cudnnStatus_t CUDNNWINAPI
|
| 623 |
+
cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
|
| 624 |
+
|
| 625 |
+
/* LRN functions: output = alpha * normalize(x) + beta * old_y */
|
| 626 |
+
|
| 627 |
+
/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
|
| 628 |
+
cudnnStatus_t CUDNNWINAPI
|
| 629 |
+
cudnnLRNCrossChannelForward(cudnnHandle_t handle,
|
| 630 |
+
cudnnLRNDescriptor_t normDesc,
|
| 631 |
+
cudnnLRNMode_t lrnMode,
|
| 632 |
+
const void *alpha,
|
| 633 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 634 |
+
const void *x,
|
| 635 |
+
const void *beta,
|
| 636 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 637 |
+
void *y);
|
| 638 |
+
|
| 639 |
+
typedef enum {
|
| 640 |
+
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
|
| 641 |
+
} cudnnDivNormMode_t;
|
| 642 |
+
|
| 643 |
+
/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
|
| 644 |
+
cudnnStatus_t CUDNNWINAPI
|
| 645 |
+
cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
|
| 646 |
+
cudnnLRNDescriptor_t normDesc,
|
| 647 |
+
cudnnDivNormMode_t mode,
|
| 648 |
+
const void *alpha,
|
| 649 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
|
| 650 |
+
const void *x,
|
| 651 |
+
const void *means, /* if NULL, means are assumed to be zero */
|
| 652 |
+
void *temp,
|
| 653 |
+
void *temp2,
|
| 654 |
+
const void *beta,
|
| 655 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 656 |
+
void *y);
|
| 657 |
+
|
| 658 |
+
typedef enum {
|
| 659 |
+
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
| 660 |
+
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
|
| 661 |
+
|
| 662 |
+
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
| 663 |
+
CUDNN_BATCHNORM_SPATIAL = 1,
|
| 664 |
+
|
| 665 |
+
/*
|
| 666 |
+
* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
|
| 667 |
+
* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
|
| 668 |
+
*/
|
| 669 |
+
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
|
| 670 |
+
} cudnnBatchNormMode_t CUDNN_DEPRECATED;
|
| 671 |
+
|
| 672 |
+
#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
|
| 673 |
+
|
| 674 |
+
/*
|
| 675 |
+
* Derives a tensor descriptor from layer data descriptor for BatchNormalization
|
| 676 |
+
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
| 677 |
+
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
|
| 678 |
+
*/
|
| 679 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 680 |
+
cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
|
| 681 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 682 |
+
cudnnBatchNormMode_t mode);
|
| 683 |
+
|
| 684 |
+
typedef enum {
|
| 685 |
+
CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
|
| 686 |
+
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
|
| 687 |
+
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
|
| 688 |
+
} cudnnBatchNormOps_t CUDNN_DEPRECATED;
|
| 689 |
+
|
| 690 |
+
/*
|
| 691 |
+
* Performs Batch Normalization during Inference:
|
| 692 |
+
* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
|
| 693 |
+
* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
|
| 694 |
+
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
|
| 695 |
+
* above for notes on function arguments.
|
| 696 |
+
*/
|
| 697 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 698 |
+
cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
|
| 699 |
+
cudnnBatchNormMode_t mode,
|
| 700 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 701 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 702 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 703 |
+
const void *x, /* NxCxHxW */
|
| 704 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 705 |
+
void *y, /* NxCxHxW */
|
| 706 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 707 |
+
const void *bnScale,
|
| 708 |
+
const void *bnBias,
|
| 709 |
+
const void *estimatedMean,
|
| 710 |
+
const void *estimatedVariance,
|
| 711 |
+
double epsilon);
|
| 712 |
+
|
| 713 |
+
typedef enum {
|
| 714 |
+
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
| 715 |
+
CUDNN_NORM_PER_ACTIVATION = 0,
|
| 716 |
+
|
| 717 |
+
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
| 718 |
+
CUDNN_NORM_PER_CHANNEL = 1,
|
| 719 |
+
} cudnnNormMode_t CUDNN_DEPRECATED;
|
| 720 |
+
|
| 721 |
+
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
|
| 722 |
+
|
| 723 |
+
/*
|
| 724 |
+
* Derives a tensor descriptor from layer data descriptor for Normalization
|
| 725 |
+
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
| 726 |
+
* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
|
| 727 |
+
*/
|
| 728 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 729 |
+
cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
|
| 730 |
+
cudnnTensorDescriptor_t derivedNormMeanVarDesc,
|
| 731 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 732 |
+
cudnnNormMode_t mode,
|
| 733 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 734 |
+
|
| 735 |
+
typedef enum {
|
| 736 |
+
CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
|
| 737 |
+
CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
|
| 738 |
+
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
|
| 739 |
+
} cudnnNormOps_t CUDNN_DEPRECATED;
|
| 740 |
+
|
| 741 |
+
/*
|
| 742 |
+
* Performs Normalization during Inference:
|
| 743 |
+
* y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
|
| 744 |
+
* with normScale, normBias, runningMean, runningInvVariance tensors indexed
|
| 745 |
+
* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
|
| 746 |
+
* above for notes on function arguments.
|
| 747 |
+
*/
|
| 748 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 749 |
+
cudnnNormalizationForwardInference(cudnnHandle_t handle,
|
| 750 |
+
cudnnNormMode_t mode,
|
| 751 |
+
cudnnNormOps_t normOps,
|
| 752 |
+
cudnnNormAlgo_t algo,
|
| 753 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 754 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 755 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 756 |
+
const void *x, /* NxCxHxW */
|
| 757 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 758 |
+
const void *normScale,
|
| 759 |
+
const void *normBias,
|
| 760 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 761 |
+
const void *estimatedMean,
|
| 762 |
+
const void *estimatedVariance,
|
| 763 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 764 |
+
const void *z,
|
| 765 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 766 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 767 |
+
void *y, /* NxCxHxW */
|
| 768 |
+
double epsilon,
|
| 769 |
+
int groupCnt); /* Place hold for future work*/
|
| 770 |
+
|
| 771 |
+
/* APIs for spatial transformer network*/
|
| 772 |
+
typedef enum {
|
| 773 |
+
CUDNN_SAMPLER_BILINEAR = 0,
|
| 774 |
+
} cudnnSamplerType_t;
|
| 775 |
+
|
| 776 |
+
cudnnStatus_t CUDNNWINAPI
|
| 777 |
+
cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
|
| 778 |
+
|
| 779 |
+
cudnnStatus_t CUDNNWINAPI
|
| 780 |
+
cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
|
| 781 |
+
cudnnSamplerType_t samplerType,
|
| 782 |
+
cudnnDataType_t dataType,
|
| 783 |
+
const int nbDims,
|
| 784 |
+
const int dimA[]);
|
| 785 |
+
|
| 786 |
+
cudnnStatus_t CUDNNWINAPI
|
| 787 |
+
cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
|
| 788 |
+
|
| 789 |
+
cudnnStatus_t CUDNNWINAPI
|
| 790 |
+
cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
|
| 791 |
+
const cudnnSpatialTransformerDescriptor_t stDesc,
|
| 792 |
+
const void *theta,
|
| 793 |
+
void *grid);
|
| 794 |
+
|
| 795 |
+
cudnnStatus_t CUDNNWINAPI
|
| 796 |
+
cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
|
| 797 |
+
cudnnSpatialTransformerDescriptor_t stDesc,
|
| 798 |
+
const void *alpha,
|
| 799 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 800 |
+
const void *x,
|
| 801 |
+
const void *grid,
|
| 802 |
+
const void *beta,
|
| 803 |
+
cudnnTensorDescriptor_t yDesc,
|
| 804 |
+
void *y);
|
| 805 |
+
|
| 806 |
+
typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
|
| 807 |
+
|
| 808 |
+
cudnnStatus_t CUDNNWINAPI
|
| 809 |
+
cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
|
| 810 |
+
|
| 811 |
+
cudnnStatus_t CUDNNWINAPI
|
| 812 |
+
cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
|
| 813 |
+
|
| 814 |
+
/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
|
| 815 |
+
cudnnStatus_t CUDNNWINAPI
|
| 816 |
+
cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
|
| 817 |
+
|
| 818 |
+
/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
|
| 819 |
+
cudnnStatus_t CUDNNWINAPI
|
| 820 |
+
cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
|
| 821 |
+
|
| 822 |
+
cudnnStatus_t CUDNNWINAPI
|
| 823 |
+
cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 824 |
+
cudnnHandle_t handle,
|
| 825 |
+
float dropout,
|
| 826 |
+
void *states,
|
| 827 |
+
size_t stateSizeInBytes,
|
| 828 |
+
unsigned long long seed);
|
| 829 |
+
|
| 830 |
+
/* Restores the dropout descriptor to a previously saved-off state */
|
| 831 |
+
cudnnStatus_t CUDNNWINAPI
|
| 832 |
+
cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 833 |
+
cudnnHandle_t handle,
|
| 834 |
+
float dropout,
|
| 835 |
+
void *states,
|
| 836 |
+
size_t stateSizeInBytes,
|
| 837 |
+
unsigned long long seed);
|
| 838 |
+
|
| 839 |
+
cudnnStatus_t CUDNNWINAPI
|
| 840 |
+
cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
| 841 |
+
cudnnHandle_t handle,
|
| 842 |
+
float *dropout,
|
| 843 |
+
void **states,
|
| 844 |
+
unsigned long long *seed);
|
| 845 |
+
|
| 846 |
+
cudnnStatus_t CUDNNWINAPI
|
| 847 |
+
cudnnDropoutForward(cudnnHandle_t handle,
|
| 848 |
+
const cudnnDropoutDescriptor_t dropoutDesc,
|
| 849 |
+
const cudnnTensorDescriptor_t xdesc,
|
| 850 |
+
const void *x,
|
| 851 |
+
const cudnnTensorDescriptor_t ydesc,
|
| 852 |
+
void *y,
|
| 853 |
+
void *reserveSpace,
|
| 854 |
+
size_t reserveSpaceSizeInBytes);
|
| 855 |
+
|
| 856 |
+
/* TODO: move these enums out to the appropriate submodule */
|
| 857 |
+
typedef enum {
|
| 858 |
+
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
|
| 859 |
+
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
|
| 860 |
+
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
|
| 861 |
+
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
|
| 862 |
+
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
|
| 863 |
+
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
|
| 864 |
+
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
|
| 865 |
+
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
|
| 866 |
+
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
|
| 867 |
+
} cudnnConvolutionFwdAlgo_t;
|
| 868 |
+
|
| 869 |
+
typedef enum {
|
| 870 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
|
| 871 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
|
| 872 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
|
| 873 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
|
| 874 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
|
| 875 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
|
| 876 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
|
| 877 |
+
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
|
| 878 |
+
} cudnnConvolutionBwdFilterAlgo_t;
|
| 879 |
+
|
| 880 |
+
typedef enum {
|
| 881 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
|
| 882 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
|
| 883 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
|
| 884 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
|
| 885 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
|
| 886 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
|
| 887 |
+
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
|
| 888 |
+
} cudnnConvolutionBwdDataAlgo_t;
|
| 889 |
+
|
| 890 |
+
typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
|
| 891 |
+
|
| 892 |
+
/*
|
| 893 |
+
* \brief Cross-library version checker.
|
| 894 |
+
* This function is implemented differently in each sub-library. Each sublib
|
| 895 |
+
* checks whether its own version matches that of its dependencies.
|
| 896 |
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
| 897 |
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
| 898 |
+
*/
|
| 899 |
+
cudnnStatus_t CUDNNWINAPI
|
| 900 |
+
cudnnOpsVersionCheck(void);
|
| 901 |
+
|
| 902 |
+
/* Function to perform backward softmax */
|
| 903 |
+
cudnnStatus_t CUDNNWINAPI
|
| 904 |
+
cudnnSoftmaxBackward(cudnnHandle_t handle,
|
| 905 |
+
cudnnSoftmaxAlgorithm_t algo,
|
| 906 |
+
cudnnSoftmaxMode_t mode,
|
| 907 |
+
const void *alpha,
|
| 908 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 909 |
+
const void *y,
|
| 910 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 911 |
+
const void *dy,
|
| 912 |
+
const void *beta,
|
| 913 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 914 |
+
void *dx);
|
| 915 |
+
|
| 916 |
+
/* Function to perform backward pooling */
|
| 917 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 918 |
+
cudnnPoolingBackward(cudnnHandle_t handle,
|
| 919 |
+
const cudnnPoolingDescriptor_t poolingDesc,
|
| 920 |
+
const void *alpha,
|
| 921 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 922 |
+
const void *y,
|
| 923 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 924 |
+
const void *dy,
|
| 925 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 926 |
+
const void *x,
|
| 927 |
+
const void *beta,
|
| 928 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 929 |
+
void *dx);
|
| 930 |
+
|
| 931 |
+
/* Function to perform backward activation */
|
| 932 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 933 |
+
cudnnActivationBackward(cudnnHandle_t handle,
|
| 934 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 935 |
+
const void *alpha,
|
| 936 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 937 |
+
const void *y,
|
| 938 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 939 |
+
const void *dy,
|
| 940 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 941 |
+
const void *x,
|
| 942 |
+
const void *beta,
|
| 943 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 944 |
+
void *dx);
|
| 945 |
+
|
| 946 |
+
/* LRN cross-channel backward computation. Double parameters cast to tensor data type */
|
| 947 |
+
cudnnStatus_t CUDNNWINAPI
|
| 948 |
+
cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
|
| 949 |
+
cudnnLRNDescriptor_t normDesc,
|
| 950 |
+
cudnnLRNMode_t lrnMode,
|
| 951 |
+
const void *alpha,
|
| 952 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 953 |
+
const void *y,
|
| 954 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 955 |
+
const void *dy,
|
| 956 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 957 |
+
const void *x,
|
| 958 |
+
const void *beta,
|
| 959 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 960 |
+
void *dx);
|
| 961 |
+
|
| 962 |
+
cudnnStatus_t CUDNNWINAPI
|
| 963 |
+
cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
|
| 964 |
+
cudnnLRNDescriptor_t normDesc,
|
| 965 |
+
cudnnDivNormMode_t mode,
|
| 966 |
+
const void *alpha,
|
| 967 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
|
| 968 |
+
const void *x,
|
| 969 |
+
const void *means, /* if NULL, means are assumed to be zero */
|
| 970 |
+
const void *dy,
|
| 971 |
+
void *temp,
|
| 972 |
+
void *temp2,
|
| 973 |
+
const void *beta,
|
| 974 |
+
const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
|
| 975 |
+
void *dx, /* output x differential */
|
| 976 |
+
void *dMeans); /* output means differential, can be NULL */
|
| 977 |
+
|
| 978 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 979 |
+
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
|
| 980 |
+
cudnnBatchNormMode_t mode,
|
| 981 |
+
cudnnBatchNormOps_t bnOps,
|
| 982 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 983 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 984 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 985 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 986 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 987 |
+
size_t *sizeInBytes);
|
| 988 |
+
|
| 989 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 990 |
+
cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
|
| 991 |
+
cudnnBatchNormMode_t mode,
|
| 992 |
+
cudnnBatchNormOps_t bnOps,
|
| 993 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 994 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 995 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 996 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 997 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 998 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 999 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1000 |
+
size_t *sizeInBytes);
|
| 1001 |
+
|
| 1002 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1003 |
+
cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
|
| 1004 |
+
cudnnBatchNormMode_t mode,
|
| 1005 |
+
cudnnBatchNormOps_t bnOps,
|
| 1006 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1007 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1008 |
+
size_t *sizeInBytes);
|
| 1009 |
+
|
| 1010 |
+
/* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
|
| 1011 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1012 |
+
cudnnBatchNormalizationForwardTraining(
|
| 1013 |
+
cudnnHandle_t handle,
|
| 1014 |
+
cudnnBatchNormMode_t mode,
|
| 1015 |
+
|
| 1016 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1017 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1018 |
+
|
| 1019 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1020 |
+
const void *x, /* NxCxHxW */
|
| 1021 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1022 |
+
void *y, /* NxCxHxW */
|
| 1023 |
+
|
| 1024 |
+
/* Shared desc for the next 6 tensors in the argument list.
|
| 1025 |
+
Data type to be set as follows:
|
| 1026 |
+
type = (typeOf(x) == double) ? double : float
|
| 1027 |
+
Dimensions for this descriptor depend on normalization mode
|
| 1028 |
+
- Spatial Normalization : tensors are expected to have dims 1xCx1x1
|
| 1029 |
+
(normalization is performed across NxHxW)
|
| 1030 |
+
- Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
|
| 1031 |
+
(normalization is performed across N) */
|
| 1032 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 1033 |
+
|
| 1034 |
+
/* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
|
| 1035 |
+
const void *bnScale,
|
| 1036 |
+
const void *bnBias,
|
| 1037 |
+
|
| 1038 |
+
/* MUST use factor=1 in the very first call of a complete training cycle.
|
| 1039 |
+
Use a factor=1/(1+n) at N-th call to the function to get
|
| 1040 |
+
Cumulative Moving Average (CMA) behavior
|
| 1041 |
+
CMA[n] = (x[1]+...+x[n])/n
|
| 1042 |
+
Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
|
| 1043 |
+
((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
|
| 1044 |
+
CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
|
| 1045 |
+
double exponentialAverageFactor,
|
| 1046 |
+
|
| 1047 |
+
/* Used in Training phase only.
|
| 1048 |
+
runningMean = newMean*factor + runningMean*(1-factor) */
|
| 1049 |
+
void *resultRunningMean,
|
| 1050 |
+
/* Output in training mode, input in inference. Is the moving average
|
| 1051 |
+
of variance[x] (factor is applied in the same way as for runningMean) */
|
| 1052 |
+
void *resultRunningVariance,
|
| 1053 |
+
|
| 1054 |
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
| 1055 |
+
double epsilon,
|
| 1056 |
+
|
| 1057 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1058 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1059 |
+
void *resultSaveMean,
|
| 1060 |
+
void *resultSaveInvVariance);
|
| 1061 |
+
|
| 1062 |
+
/* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
|
| 1063 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1064 |
+
cudnnBatchNormalizationForwardTrainingEx(
|
| 1065 |
+
cudnnHandle_t handle,
|
| 1066 |
+
cudnnBatchNormMode_t mode,
|
| 1067 |
+
cudnnBatchNormOps_t bnOps,
|
| 1068 |
+
|
| 1069 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1070 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1071 |
+
|
| 1072 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1073 |
+
const void *xData,
|
| 1074 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1075 |
+
const void *zData,
|
| 1076 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1077 |
+
void *yData,
|
| 1078 |
+
|
| 1079 |
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
| 1080 |
+
const void *bnScale,
|
| 1081 |
+
const void *bnBias,
|
| 1082 |
+
|
| 1083 |
+
double exponentialAverageFactor,
|
| 1084 |
+
void *resultRunningMean,
|
| 1085 |
+
void *resultRunningVariance,
|
| 1086 |
+
|
| 1087 |
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
| 1088 |
+
double epsilon,
|
| 1089 |
+
|
| 1090 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1091 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1092 |
+
void *resultSaveMean,
|
| 1093 |
+
void *resultSaveInvVariance,
|
| 1094 |
+
|
| 1095 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1096 |
+
void *workspace,
|
| 1097 |
+
size_t workSpaceSizeInBytes,
|
| 1098 |
+
void *reserveSpace,
|
| 1099 |
+
size_t reserveSpaceSizeInBytes);
|
| 1100 |
+
|
| 1101 |
+
/* Performs backward pass of Batch Normalization layer. Returns x gradient,
|
| 1102 |
+
* bnScale gradient and bnBias gradient */
|
| 1103 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1104 |
+
cudnnBatchNormalizationBackward(cudnnHandle_t handle,
|
| 1105 |
+
cudnnBatchNormMode_t mode,
|
| 1106 |
+
const void *alphaDataDiff,
|
| 1107 |
+
const void *betaDataDiff,
|
| 1108 |
+
const void *alphaParamDiff,
|
| 1109 |
+
const void *betaParamDiff,
|
| 1110 |
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
|
| 1111 |
+
const void *x,
|
| 1112 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1113 |
+
const void *dy,
|
| 1114 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1115 |
+
void *dx,
|
| 1116 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1117 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 1118 |
+
const void *bnScale, /* bnBias doesn't affect backpropagation */
|
| 1119 |
+
/* scale and bias diff are not backpropagated below this layer */
|
| 1120 |
+
void *dBnScaleResult,
|
| 1121 |
+
void *dBnBiasResult,
|
| 1122 |
+
/* Same epsilon as forward pass */
|
| 1123 |
+
double epsilon,
|
| 1124 |
+
|
| 1125 |
+
/* Optionally cached intermediate results from
|
| 1126 |
+
forward pass */
|
| 1127 |
+
const void *savedMean,
|
| 1128 |
+
const void *savedInvVariance);
|
| 1129 |
+
|
| 1130 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1131 |
+
cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
|
| 1132 |
+
cudnnBatchNormMode_t mode,
|
| 1133 |
+
cudnnBatchNormOps_t bnOps,
|
| 1134 |
+
|
| 1135 |
+
const void *alphaDataDiff,
|
| 1136 |
+
const void *betaDataDiff,
|
| 1137 |
+
const void *alphaParamDiff,
|
| 1138 |
+
const void *betaParamDiff,
|
| 1139 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1140 |
+
const void *xData,
|
| 1141 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1142 |
+
const void *yData,
|
| 1143 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1144 |
+
const void *dyData,
|
| 1145 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1146 |
+
void *dzData,
|
| 1147 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1148 |
+
void *dxData,
|
| 1149 |
+
|
| 1150 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1151 |
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
| 1152 |
+
const void *bnScaleData,
|
| 1153 |
+
const void *bnBiasData, /* needed if there is activation */
|
| 1154 |
+
void *dBnScaleData,
|
| 1155 |
+
void *dBnBiasData,
|
| 1156 |
+
double epsilon, /* Same epsilon as forward pass */
|
| 1157 |
+
|
| 1158 |
+
/* Optionally cached intermediate results from
|
| 1159 |
+
forward pass */
|
| 1160 |
+
const void *savedMean,
|
| 1161 |
+
const void *savedInvVariance,
|
| 1162 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1163 |
+
void *workSpace,
|
| 1164 |
+
size_t workSpaceSizeInBytes,
|
| 1165 |
+
void *reserveSpace,
|
| 1166 |
+
size_t reserveSpaceSizeInBytes);
|
| 1167 |
+
|
| 1168 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1169 |
+
cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
|
| 1170 |
+
cudnnNormMode_t mode,
|
| 1171 |
+
cudnnNormOps_t normOps,
|
| 1172 |
+
cudnnNormAlgo_t algo,
|
| 1173 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1174 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1175 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1176 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 1177 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1178 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1179 |
+
size_t *sizeInBytes,
|
| 1180 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1181 |
+
|
| 1182 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1183 |
+
cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
|
| 1184 |
+
cudnnNormMode_t mode,
|
| 1185 |
+
cudnnNormOps_t normOps,
|
| 1186 |
+
cudnnNormAlgo_t algo,
|
| 1187 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1188 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1189 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1190 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1191 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1192 |
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
| 1193 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1194 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1195 |
+
size_t *sizeInBytes,
|
| 1196 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1197 |
+
|
| 1198 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1199 |
+
cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
|
| 1200 |
+
cudnnNormMode_t mode,
|
| 1201 |
+
cudnnNormOps_t normOps,
|
| 1202 |
+
cudnnNormAlgo_t algo,
|
| 1203 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 1204 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1205 |
+
size_t *sizeInBytes,
|
| 1206 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1207 |
+
|
| 1208 |
+
/* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
|
| 1209 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1210 |
+
cudnnNormalizationForwardTraining(cudnnHandle_t handle,
|
| 1211 |
+
cudnnNormMode_t mode,
|
| 1212 |
+
cudnnNormOps_t normOps,
|
| 1213 |
+
cudnnNormAlgo_t algo,
|
| 1214 |
+
const void *alpha, /* alpha[0] = result blend factor */
|
| 1215 |
+
const void *beta, /* beta[0] = dest layer blend factor */
|
| 1216 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1217 |
+
const void *xData,
|
| 1218 |
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
| 1219 |
+
const void *normScale,
|
| 1220 |
+
const void *normBias,
|
| 1221 |
+
double exponentialAverageFactor,
|
| 1222 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1223 |
+
void *resultRunningMean,
|
| 1224 |
+
void *resultRunningVariance,
|
| 1225 |
+
/* Has to be >= 0. Should be the same in forward and backward functions. */
|
| 1226 |
+
double epsilon,
|
| 1227 |
+
/* Optionally save intermediate results from the forward pass here
|
| 1228 |
+
- can be reused to speed up backward pass. NULL if unused */
|
| 1229 |
+
void *resultSaveMean,
|
| 1230 |
+
void *resultSaveInvVariance,
|
| 1231 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1232 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 1233 |
+
const void *zData,
|
| 1234 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1235 |
+
void *yData,
|
| 1236 |
+
void *workspace,
|
| 1237 |
+
size_t workSpaceSizeInBytes,
|
| 1238 |
+
void *reserveSpace,
|
| 1239 |
+
size_t reserveSpaceSizeInBytes,
|
| 1240 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1241 |
+
|
| 1242 |
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
| 1243 |
+
cudnnNormalizationBackward(cudnnHandle_t handle,
|
| 1244 |
+
cudnnNormMode_t mode,
|
| 1245 |
+
cudnnNormOps_t normOps,
|
| 1246 |
+
cudnnNormAlgo_t algo,
|
| 1247 |
+
const void *alphaDataDiff,
|
| 1248 |
+
const void *betaDataDiff,
|
| 1249 |
+
const void *alphaParamDiff,
|
| 1250 |
+
const void *betaParamDiff,
|
| 1251 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1252 |
+
const void *xData,
|
| 1253 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 1254 |
+
const void *yData,
|
| 1255 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1256 |
+
const void *dyData,
|
| 1257 |
+
const cudnnTensorDescriptor_t dzDesc,
|
| 1258 |
+
void *dzData,
|
| 1259 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1260 |
+
void *dxData,
|
| 1261 |
+
/* Shared tensor desc for the 4 tensors below */
|
| 1262 |
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
| 1263 |
+
const void *normScaleData,
|
| 1264 |
+
const void *normBiasData, /* needed if there is activation */
|
| 1265 |
+
void *dNormScaleData,
|
| 1266 |
+
void *dNormBiasData,
|
| 1267 |
+
double epsilon, /* Same epsilon as forward pass */
|
| 1268 |
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
| 1269 |
+
/* Optionally cached intermediate results from
|
| 1270 |
+
forward pass */
|
| 1271 |
+
const void *savedMean,
|
| 1272 |
+
const void *savedInvVariance,
|
| 1273 |
+
cudnnActivationDescriptor_t activationDesc,
|
| 1274 |
+
void *workSpace,
|
| 1275 |
+
size_t workSpaceSizeInBytes,
|
| 1276 |
+
void *reserveSpace,
|
| 1277 |
+
size_t reserveSpaceSizeInBytes,
|
| 1278 |
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
| 1279 |
+
|
| 1280 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1281 |
+
cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
|
| 1282 |
+
const cudnnSpatialTransformerDescriptor_t stDesc,
|
| 1283 |
+
const void *dgrid,
|
| 1284 |
+
void *dtheta);
|
| 1285 |
+
|
| 1286 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1287 |
+
cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
|
| 1288 |
+
cudnnSpatialTransformerDescriptor_t stDesc,
|
| 1289 |
+
const void *alpha,
|
| 1290 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 1291 |
+
const void *x,
|
| 1292 |
+
const void *beta,
|
| 1293 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 1294 |
+
void *dx,
|
| 1295 |
+
const void *alphaDgrid,
|
| 1296 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 1297 |
+
const void *dy,
|
| 1298 |
+
const void *grid,
|
| 1299 |
+
const void *betaDgrid,
|
| 1300 |
+
void *dgrid);
|
| 1301 |
+
|
| 1302 |
+
cudnnStatus_t CUDNNWINAPI
|
| 1303 |
+
cudnnDropoutBackward(cudnnHandle_t handle,
|
| 1304 |
+
const cudnnDropoutDescriptor_t dropoutDesc,
|
| 1305 |
+
const cudnnTensorDescriptor_t dydesc,
|
| 1306 |
+
const void *dy,
|
| 1307 |
+
const cudnnTensorDescriptor_t dxdesc,
|
| 1308 |
+
void *dx,
|
| 1309 |
+
void *reserveSpace,
|
| 1310 |
+
size_t reserveSpaceSizeInBytes);
|
| 1311 |
+
|
| 1312 |
+
#if defined(__cplusplus)
|
| 1313 |
+
}
|
| 1314 |
+
#endif
|
| 1315 |
+
|
| 1316 |
+
#endif /* CUDNN_OPS_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_v9.h
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/* cudnn : Neural Networks Library */
|
| 51 |
+
|
| 52 |
+
#if !defined(CUDNN_H_)
|
| 53 |
+
#define CUDNN_H_
|
| 54 |
+
#if defined(__cplusplus)
|
| 55 |
+
extern "C" {
|
| 56 |
+
#endif
|
| 57 |
+
|
| 58 |
+
#include <cuda_runtime_api.h>
|
| 59 |
+
#include "cudnn_version.h"
|
| 60 |
+
#include "cudnn_graph.h"
|
| 61 |
+
#include "cudnn_ops.h"
|
| 62 |
+
#include "cudnn_adv.h"
|
| 63 |
+
#include "cudnn_cnn.h"
|
| 64 |
+
|
| 65 |
+
#if defined(__cplusplus)
|
| 66 |
+
}
|
| 67 |
+
#endif
|
| 68 |
+
#endif /* CUDNN_H_ */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_version.h
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/**
|
| 51 |
+
* \file: The master cuDNN version file.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#ifndef CUDNN_VERSION_H_
|
| 55 |
+
#define CUDNN_VERSION_H_
|
| 56 |
+
|
| 57 |
+
#define CUDNN_MAJOR 9
|
| 58 |
+
#define CUDNN_MINOR 10
|
| 59 |
+
#define CUDNN_PATCHLEVEL 2
|
| 60 |
+
|
| 61 |
+
#define CUDNN_VERSION (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
|
| 62 |
+
|
| 63 |
+
/* cannot use constexpr here since this is a C-only file */
|
| 64 |
+
/* Below is the max SM version this cuDNN library is aware of and supports natively */
|
| 65 |
+
|
| 66 |
+
#define CUDNN_MAX_SM_MAJOR_NUMBER 12
|
| 67 |
+
#define CUDNN_MAX_SM_MINOR_NUMBER 0
|
| 68 |
+
#define CUDNN_MAX_DEVICE_VERSION (CUDNN_MAX_SM_MAJOR_NUMBER * 100 + CUDNN_MAX_SM_MINOR_NUMBER * 10)
|
| 69 |
+
|
| 70 |
+
#endif /* CUDNN_VERSION_H */
|
.venv/lib/python3.12/site-packages/nvidia/cudnn/include/cudnn_version_v9.h
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/**
|
| 51 |
+
* \file: The master cuDNN version file.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#ifndef CUDNN_VERSION_H_
|
| 55 |
+
#define CUDNN_VERSION_H_
|
| 56 |
+
|
| 57 |
+
#define CUDNN_MAJOR 9
|
| 58 |
+
#define CUDNN_MINOR 10
|
| 59 |
+
#define CUDNN_PATCHLEVEL 2
|
| 60 |
+
|
| 61 |
+
#define CUDNN_VERSION (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
|
| 62 |
+
|
| 63 |
+
/* cannot use constexpr here since this is a C-only file */
|
| 64 |
+
/* Below is the max SM version this cuDNN library is aware of and supports natively */
|
| 65 |
+
|
| 66 |
+
#define CUDNN_MAX_SM_MAJOR_NUMBER 12
|
| 67 |
+
#define CUDNN_MAX_SM_MINOR_NUMBER 0
|
| 68 |
+
#define CUDNN_MAX_DEVICE_VERSION (CUDNN_MAX_SM_MAJOR_NUMBER * 100 + CUDNN_MAX_SM_MINOR_NUMBER * 10)
|
| 69 |
+
|
| 70 |
+
#endif /* CUDNN_VERSION_H */
|
.venv/lib/python3.12/site-packages/nvidia_nccl_cu12-2.27.3.dist-info/licenses/License.txt
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
| 3 |
+
|
| 4 |
+
Redistribution and use in source and binary forms, with or without
|
| 5 |
+
modification, are permitted provided that the following conditions
|
| 6 |
+
are met:
|
| 7 |
+
* Redistributions of source code must retain the above copyright
|
| 8 |
+
notice, this list of conditions and the following disclaimer.
|
| 9 |
+
* Redistributions in binary form must reproduce the above copyright
|
| 10 |
+
notice, this list of conditions and the following disclaimer in the
|
| 11 |
+
documentation and/or other materials provided with the distribution.
|
| 12 |
+
* Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
|
| 13 |
+
Laboratory, the U.S. Department of Energy, nor the names of their
|
| 14 |
+
contributors may be used to endorse or promote products derived
|
| 15 |
+
from this software without specific prior written permission.
|
| 16 |
+
|
| 17 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
| 18 |
+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 19 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 20 |
+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
| 21 |
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
| 22 |
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
| 23 |
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| 24 |
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
| 25 |
+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 26 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 27 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 28 |
+
|
| 29 |
+
The U.S. Department of Energy funded the development of this software
|
| 30 |
+
under subcontract 7078610 with Lawrence Berkeley National Laboratory.
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
This code also includes files from the NVIDIA Tools Extension SDK project.
|
| 34 |
+
|
| 35 |
+
See:
|
| 36 |
+
|
| 37 |
+
https://github.com/NVIDIA/NVTX
|
| 38 |
+
|
| 39 |
+
for more information and license details.
|
.venv/lib/python3.12/site-packages/sklearn/__check_build/__init__.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module to give helpful messages to the user that did not
|
| 2 |
+
compile scikit-learn properly.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
# Authors: The scikit-learn developers
|
| 6 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
INPLACE_MSG = """
|
| 11 |
+
It appears that you are importing a local scikit-learn source tree. For
|
| 12 |
+
this, you need to have an inplace install. Maybe you are in the source
|
| 13 |
+
directory and you need to try from another location."""
|
| 14 |
+
|
| 15 |
+
STANDARD_MSG = """
|
| 16 |
+
If you have used an installer, please check that it is suited for your
|
| 17 |
+
Python version, your operating system and your platform."""
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def raise_build_error(e):
|
| 21 |
+
# Raise a comprehensible error and list the contents of the
|
| 22 |
+
# directory to help debugging on the mailing list.
|
| 23 |
+
local_dir = os.path.split(__file__)[0]
|
| 24 |
+
msg = STANDARD_MSG
|
| 25 |
+
if local_dir == "sklearn/__check_build":
|
| 26 |
+
# Picking up the local install: this will work only if the
|
| 27 |
+
# install is an 'inplace build'
|
| 28 |
+
msg = INPLACE_MSG
|
| 29 |
+
dir_content = list()
|
| 30 |
+
for i, filename in enumerate(os.listdir(local_dir)):
|
| 31 |
+
if (i + 1) % 3:
|
| 32 |
+
dir_content.append(filename.ljust(26))
|
| 33 |
+
else:
|
| 34 |
+
dir_content.append(filename + "\n")
|
| 35 |
+
raise ImportError(
|
| 36 |
+
"""%s
|
| 37 |
+
___________________________________________________________________________
|
| 38 |
+
Contents of %s:
|
| 39 |
+
%s
|
| 40 |
+
___________________________________________________________________________
|
| 41 |
+
It seems that scikit-learn has not been built correctly.
|
| 42 |
+
|
| 43 |
+
If you have installed scikit-learn from source, please do not forget
|
| 44 |
+
to build the package before using it. For detailed instructions, see:
|
| 45 |
+
https://scikit-learn.org/dev/developers/advanced_installation.html#building-from-source
|
| 46 |
+
%s"""
|
| 47 |
+
% (e, local_dir, "".join(dir_content).strip(), msg)
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
from ._check_build import check_build # noqa: F401
|
| 53 |
+
except ImportError as e:
|
| 54 |
+
raise_build_error(e)
|
.venv/lib/python3.12/site-packages/sklearn/__check_build/_check_build.cpython-312-x86_64-linux-gnu.so
ADDED
|
Binary file (45.3 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__check_build/_check_build.pyx
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def check_build():
|
| 2 |
+
return
|
.venv/lib/python3.12/site-packages/sklearn/__check_build/meson.build
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
py.extension_module(
|
| 2 |
+
'_check_build',
|
| 3 |
+
cython_gen.process('_check_build.pyx'),
|
| 4 |
+
install: true,
|
| 5 |
+
subdir: 'sklearn/__check_build',
|
| 6 |
+
)
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (3.12 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/_built_with_meson.cpython-312.pyc
ADDED
|
Binary file (193 Bytes). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/_config.cpython-312.pyc
ADDED
|
Binary file (14.2 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/_distributor_init.cpython-312.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/base.cpython-312.pyc
ADDED
|
Binary file (51.3 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/__pycache__/exceptions.cpython-312.pyc
ADDED
|
Binary file (9.49 kB). View file
|
|
|
.venv/lib/python3.12/site-packages/sklearn/_build_utils/__init__.py
ADDED
|
File without changes
|
.venv/lib/python3.12/site-packages/sklearn/_build_utils/tempita.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
# Authors: The scikit-learn developers
|
| 4 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
from Cython import Tempita as tempita
|
| 10 |
+
|
| 11 |
+
# XXX: If this import ever fails (does it really?), vendor either
|
| 12 |
+
# cython.tempita or numpy/npy_tempita.
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def process_tempita(fromfile, outfile=None):
|
| 16 |
+
"""Process tempita templated file and write out the result.
|
| 17 |
+
|
| 18 |
+
The template file is expected to end in `.c.tp` or `.pyx.tp`:
|
| 19 |
+
E.g. processing `template.c.in` generates `template.c`.
|
| 20 |
+
|
| 21 |
+
"""
|
| 22 |
+
with open(fromfile, "r", encoding="utf-8") as f:
|
| 23 |
+
template_content = f.read()
|
| 24 |
+
|
| 25 |
+
template = tempita.Template(template_content)
|
| 26 |
+
content = template.substitute()
|
| 27 |
+
|
| 28 |
+
with open(outfile, "w", encoding="utf-8") as f:
|
| 29 |
+
f.write(content)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main():
|
| 33 |
+
parser = argparse.ArgumentParser()
|
| 34 |
+
parser.add_argument("infile", type=str, help="Path to the input file")
|
| 35 |
+
parser.add_argument("-o", "--outdir", type=str, help="Path to the output directory")
|
| 36 |
+
parser.add_argument(
|
| 37 |
+
"-i",
|
| 38 |
+
"--ignore",
|
| 39 |
+
type=str,
|
| 40 |
+
help=(
|
| 41 |
+
"An ignored input - may be useful to add a "
|
| 42 |
+
"dependency between custom targets"
|
| 43 |
+
),
|
| 44 |
+
)
|
| 45 |
+
args = parser.parse_args()
|
| 46 |
+
|
| 47 |
+
if not args.infile.endswith(".tp"):
|
| 48 |
+
raise ValueError(f"Unexpected extension: {args.infile}")
|
| 49 |
+
|
| 50 |
+
if not args.outdir:
|
| 51 |
+
raise ValueError("Missing `--outdir` argument to tempita.py")
|
| 52 |
+
|
| 53 |
+
outdir_abs = os.path.join(os.getcwd(), args.outdir)
|
| 54 |
+
outfile = os.path.join(
|
| 55 |
+
outdir_abs, os.path.splitext(os.path.split(args.infile)[1])[0]
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
process_tempita(args.infile, outfile)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
main()
|
.venv/lib/python3.12/site-packages/sklearn/_build_utils/version.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Extract version number from __init__.py"""
|
| 3 |
+
|
| 4 |
+
# Authors: The scikit-learn developers
|
| 5 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
sklearn_init = os.path.join(os.path.dirname(__file__), "../__init__.py")
|
| 10 |
+
|
| 11 |
+
data = open(sklearn_init).readlines()
|
| 12 |
+
version_line = next(line for line in data if line.startswith("__version__"))
|
| 13 |
+
|
| 14 |
+
version = version_line.strip().split(" = ")[1].replace('"', "").replace("'", "")
|
| 15 |
+
|
| 16 |
+
print(version)
|
.venv/lib/python3.12/site-packages/sklearn/_loss/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
The :mod:`sklearn._loss` module includes loss function classes suitable for
|
| 3 |
+
fitting classification and regression tasks.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# Authors: The scikit-learn developers
|
| 7 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 8 |
+
|
| 9 |
+
from .loss import (
|
| 10 |
+
AbsoluteError,
|
| 11 |
+
HalfBinomialLoss,
|
| 12 |
+
HalfGammaLoss,
|
| 13 |
+
HalfMultinomialLoss,
|
| 14 |
+
HalfPoissonLoss,
|
| 15 |
+
HalfSquaredError,
|
| 16 |
+
HalfTweedieLoss,
|
| 17 |
+
HalfTweedieLossIdentity,
|
| 18 |
+
HuberLoss,
|
| 19 |
+
PinballLoss,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
__all__ = [
|
| 23 |
+
"AbsoluteError",
|
| 24 |
+
"HalfBinomialLoss",
|
| 25 |
+
"HalfGammaLoss",
|
| 26 |
+
"HalfMultinomialLoss",
|
| 27 |
+
"HalfPoissonLoss",
|
| 28 |
+
"HalfSquaredError",
|
| 29 |
+
"HalfTweedieLoss",
|
| 30 |
+
"HalfTweedieLossIdentity",
|
| 31 |
+
"HuberLoss",
|
| 32 |
+
"PinballLoss",
|
| 33 |
+
]
|
.venv/lib/python3.12/site-packages/sklearn/_loss/_loss.pxd
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Fused types for input like y_true, raw_prediction, sample_weights.
|
| 2 |
+
ctypedef fused floating_in:
|
| 3 |
+
double
|
| 4 |
+
float
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Fused types for output like gradient and hessian
|
| 8 |
+
# We use a different fused types for input (floating_in) and output (floating_out), such
|
| 9 |
+
# that input and output can have different dtypes in the same function call. A single
|
| 10 |
+
# fused type can only take on one single value (type) for all arguments in one function
|
| 11 |
+
# call.
|
| 12 |
+
ctypedef fused floating_out:
|
| 13 |
+
double
|
| 14 |
+
float
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Struct to return 2 doubles
|
| 18 |
+
ctypedef struct double_pair:
|
| 19 |
+
double val1
|
| 20 |
+
double val2
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# C base class for loss functions
|
| 24 |
+
cdef class CyLossFunction:
|
| 25 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 26 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 27 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
cdef class CyHalfSquaredError(CyLossFunction):
|
| 31 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 32 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 33 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
cdef class CyAbsoluteError(CyLossFunction):
|
| 37 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 38 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 39 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
cdef class CyPinballLoss(CyLossFunction):
|
| 43 |
+
cdef readonly double quantile # readonly makes it accessible from Python
|
| 44 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 45 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 46 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
cdef class CyHuberLoss(CyLossFunction):
|
| 50 |
+
cdef public double delta # public makes it accessible from Python
|
| 51 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 52 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 53 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
cdef class CyHalfPoissonLoss(CyLossFunction):
|
| 57 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 58 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 59 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
cdef class CyHalfGammaLoss(CyLossFunction):
|
| 63 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 64 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 65 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
cdef class CyHalfTweedieLoss(CyLossFunction):
|
| 69 |
+
cdef readonly double power # readonly makes it accessible from Python
|
| 70 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 71 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 72 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
cdef class CyHalfTweedieLossIdentity(CyLossFunction):
|
| 76 |
+
cdef readonly double power # readonly makes it accessible from Python
|
| 77 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 78 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 79 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
cdef class CyHalfBinomialLoss(CyLossFunction):
|
| 83 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 84 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 85 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
cdef class CyExponentialLoss(CyLossFunction):
|
| 89 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
|
| 90 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
|
| 91 |
+
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
cdef class CyHalfMultinomialLoss():
|
| 95 |
+
cdef void cy_gradient(
|
| 96 |
+
self,
|
| 97 |
+
const floating_in y_true,
|
| 98 |
+
const floating_in[::1] raw_prediction,
|
| 99 |
+
const floating_in sample_weight,
|
| 100 |
+
floating_out[::1] gradient_out,
|
| 101 |
+
) noexcept nogil
|
.venv/lib/python3.12/site-packages/sklearn/_loss/_loss.pyx.tp
ADDED
|
@@ -0,0 +1,1505 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{py:
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Template file to easily generate loops over samples using Tempita
|
| 5 |
+
(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
|
| 6 |
+
|
| 7 |
+
Generated file: _loss.pyx
|
| 8 |
+
|
| 9 |
+
Each loss class is generated by a cdef functions on single samples.
|
| 10 |
+
The keywords between double braces are substituted during the build.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
doc_HalfSquaredError = (
|
| 14 |
+
"""Half Squared Error with identity link.
|
| 15 |
+
|
| 16 |
+
Domain:
|
| 17 |
+
y_true and y_pred all real numbers
|
| 18 |
+
|
| 19 |
+
Link:
|
| 20 |
+
y_pred = raw_prediction
|
| 21 |
+
"""
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
doc_AbsoluteError = (
|
| 25 |
+
"""Absolute Error with identity link.
|
| 26 |
+
|
| 27 |
+
Domain:
|
| 28 |
+
y_true and y_pred all real numbers
|
| 29 |
+
|
| 30 |
+
Link:
|
| 31 |
+
y_pred = raw_prediction
|
| 32 |
+
"""
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
doc_PinballLoss = (
|
| 36 |
+
"""Quantile Loss aka Pinball Loss with identity link.
|
| 37 |
+
|
| 38 |
+
Domain:
|
| 39 |
+
y_true and y_pred all real numbers
|
| 40 |
+
quantile in (0, 1)
|
| 41 |
+
|
| 42 |
+
Link:
|
| 43 |
+
y_pred = raw_prediction
|
| 44 |
+
|
| 45 |
+
Note: 2 * cPinballLoss(quantile=0.5) equals cAbsoluteError()
|
| 46 |
+
"""
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
doc_HuberLoss = (
|
| 50 |
+
"""Huber Loss with identity link.
|
| 51 |
+
|
| 52 |
+
Domain:
|
| 53 |
+
y_true and y_pred all real numbers
|
| 54 |
+
delta in positive real numbers
|
| 55 |
+
|
| 56 |
+
Link:
|
| 57 |
+
y_pred = raw_prediction
|
| 58 |
+
"""
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
doc_HalfPoissonLoss = (
|
| 62 |
+
"""Half Poisson deviance loss with log-link.
|
| 63 |
+
|
| 64 |
+
Domain:
|
| 65 |
+
y_true in non-negative real numbers
|
| 66 |
+
y_pred in positive real numbers
|
| 67 |
+
|
| 68 |
+
Link:
|
| 69 |
+
y_pred = exp(raw_prediction)
|
| 70 |
+
|
| 71 |
+
Half Poisson deviance with log-link is
|
| 72 |
+
y_true * log(y_true/y_pred) + y_pred - y_true
|
| 73 |
+
= y_true * log(y_true) - y_true * raw_prediction
|
| 74 |
+
+ exp(raw_prediction) - y_true
|
| 75 |
+
|
| 76 |
+
Dropping constant terms, this gives:
|
| 77 |
+
exp(raw_prediction) - y_true * raw_prediction
|
| 78 |
+
"""
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
doc_HalfGammaLoss = (
|
| 82 |
+
"""Half Gamma deviance loss with log-link.
|
| 83 |
+
|
| 84 |
+
Domain:
|
| 85 |
+
y_true and y_pred in positive real numbers
|
| 86 |
+
|
| 87 |
+
Link:
|
| 88 |
+
y_pred = exp(raw_prediction)
|
| 89 |
+
|
| 90 |
+
Half Gamma deviance with log-link is
|
| 91 |
+
log(y_pred/y_true) + y_true/y_pred - 1
|
| 92 |
+
= raw_prediction - log(y_true) + y_true * exp(-raw_prediction) - 1
|
| 93 |
+
|
| 94 |
+
Dropping constant terms, this gives:
|
| 95 |
+
raw_prediction + y_true * exp(-raw_prediction)
|
| 96 |
+
"""
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
doc_HalfTweedieLoss = (
|
| 100 |
+
"""Half Tweedie deviance loss with log-link.
|
| 101 |
+
|
| 102 |
+
Domain:
|
| 103 |
+
y_true in real numbers if p <= 0
|
| 104 |
+
y_true in non-negative real numbers if 0 < p < 2
|
| 105 |
+
y_true in positive real numbers if p >= 2
|
| 106 |
+
y_pred and power in positive real numbers
|
| 107 |
+
|
| 108 |
+
Link:
|
| 109 |
+
y_pred = exp(raw_prediction)
|
| 110 |
+
|
| 111 |
+
Half Tweedie deviance with log-link and p=power is
|
| 112 |
+
max(y_true, 0)**(2-p) / (1-p) / (2-p)
|
| 113 |
+
- y_true * y_pred**(1-p) / (1-p)
|
| 114 |
+
+ y_pred**(2-p) / (2-p)
|
| 115 |
+
= max(y_true, 0)**(2-p) / (1-p) / (2-p)
|
| 116 |
+
- y_true * exp((1-p) * raw_prediction) / (1-p)
|
| 117 |
+
+ exp((2-p) * raw_prediction) / (2-p)
|
| 118 |
+
|
| 119 |
+
Dropping constant terms, this gives:
|
| 120 |
+
exp((2-p) * raw_prediction) / (2-p)
|
| 121 |
+
- y_true * exp((1-p) * raw_prediction) / (1-p)
|
| 122 |
+
|
| 123 |
+
Notes:
|
| 124 |
+
- Poisson with p=1 and Gamma with p=2 have different terms dropped such
|
| 125 |
+
that cHalfTweedieLoss is not continuous in p=power at p=1 and p=2.
|
| 126 |
+
- While the Tweedie distribution only exists for p<=0 or p>=1, the range
|
| 127 |
+
0<p<1 still gives a strictly consistent scoring function for the
|
| 128 |
+
expectation.
|
| 129 |
+
"""
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
doc_HalfTweedieLossIdentity = (
|
| 133 |
+
"""Half Tweedie deviance loss with identity link.
|
| 134 |
+
|
| 135 |
+
Domain:
|
| 136 |
+
y_true in real numbers if p <= 0
|
| 137 |
+
y_true in non-negative real numbers if 0 < p < 2
|
| 138 |
+
y_true in positive real numbers if p >= 2
|
| 139 |
+
y_pred and power in positive real numbers, y_pred may be negative for p=0.
|
| 140 |
+
|
| 141 |
+
Link:
|
| 142 |
+
y_pred = raw_prediction
|
| 143 |
+
|
| 144 |
+
Half Tweedie deviance with identity link and p=power is
|
| 145 |
+
max(y_true, 0)**(2-p) / (1-p) / (2-p)
|
| 146 |
+
- y_true * y_pred**(1-p) / (1-p)
|
| 147 |
+
+ y_pred**(2-p) / (2-p)
|
| 148 |
+
|
| 149 |
+
Notes:
|
| 150 |
+
- Here, we do not drop constant terms in contrast to the version with log-link.
|
| 151 |
+
"""
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
doc_HalfBinomialLoss = (
|
| 155 |
+
"""Half Binomial deviance loss with logit link.
|
| 156 |
+
|
| 157 |
+
Domain:
|
| 158 |
+
y_true in [0, 1]
|
| 159 |
+
y_pred in (0, 1), i.e. boundaries excluded
|
| 160 |
+
|
| 161 |
+
Link:
|
| 162 |
+
y_pred = expit(raw_prediction)
|
| 163 |
+
"""
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
doc_ExponentialLoss = (
|
| 167 |
+
""""Exponential loss with (half) logit link
|
| 168 |
+
|
| 169 |
+
Domain:
|
| 170 |
+
y_true in [0, 1]
|
| 171 |
+
y_pred in (0, 1), i.e. boundaries excluded
|
| 172 |
+
|
| 173 |
+
Link:
|
| 174 |
+
y_pred = expit(2 * raw_prediction)
|
| 175 |
+
"""
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# loss class name, docstring, param,
|
| 179 |
+
# cy_loss, cy_loss_grad,
|
| 180 |
+
# cy_grad, cy_grad_hess,
|
| 181 |
+
class_list = [
|
| 182 |
+
("CyHalfSquaredError", doc_HalfSquaredError, None,
|
| 183 |
+
"closs_half_squared_error", None,
|
| 184 |
+
"cgradient_half_squared_error", "cgrad_hess_half_squared_error"),
|
| 185 |
+
("CyAbsoluteError", doc_AbsoluteError, None,
|
| 186 |
+
"closs_absolute_error", None,
|
| 187 |
+
"cgradient_absolute_error", "cgrad_hess_absolute_error"),
|
| 188 |
+
("CyPinballLoss", doc_PinballLoss, "quantile",
|
| 189 |
+
"closs_pinball_loss", None,
|
| 190 |
+
"cgradient_pinball_loss", "cgrad_hess_pinball_loss"),
|
| 191 |
+
("CyHuberLoss", doc_HuberLoss, "delta",
|
| 192 |
+
"closs_huber_loss", None,
|
| 193 |
+
"cgradient_huber_loss", "cgrad_hess_huber_loss"),
|
| 194 |
+
("CyHalfPoissonLoss", doc_HalfPoissonLoss, None,
|
| 195 |
+
"closs_half_poisson", "closs_grad_half_poisson",
|
| 196 |
+
"cgradient_half_poisson", "cgrad_hess_half_poisson"),
|
| 197 |
+
("CyHalfGammaLoss", doc_HalfGammaLoss, None,
|
| 198 |
+
"closs_half_gamma", "closs_grad_half_gamma",
|
| 199 |
+
"cgradient_half_gamma", "cgrad_hess_half_gamma"),
|
| 200 |
+
("CyHalfTweedieLoss", doc_HalfTweedieLoss, "power",
|
| 201 |
+
"closs_half_tweedie", "closs_grad_half_tweedie",
|
| 202 |
+
"cgradient_half_tweedie", "cgrad_hess_half_tweedie"),
|
| 203 |
+
("CyHalfTweedieLossIdentity", doc_HalfTweedieLossIdentity, "power",
|
| 204 |
+
"closs_half_tweedie_identity", "closs_grad_half_tweedie_identity",
|
| 205 |
+
"cgradient_half_tweedie_identity", "cgrad_hess_half_tweedie_identity"),
|
| 206 |
+
("CyHalfBinomialLoss", doc_HalfBinomialLoss, None,
|
| 207 |
+
"closs_half_binomial", "closs_grad_half_binomial",
|
| 208 |
+
"cgradient_half_binomial", "cgrad_hess_half_binomial"),
|
| 209 |
+
("CyExponentialLoss", doc_ExponentialLoss, None,
|
| 210 |
+
"closs_exponential", "closs_grad_exponential",
|
| 211 |
+
"cgradient_exponential", "cgrad_hess_exponential"),
|
| 212 |
+
]
|
| 213 |
+
}}
|
| 214 |
+
|
| 215 |
+
# Design:
|
| 216 |
+
# See https://github.com/scikit-learn/scikit-learn/issues/15123 for reasons.
|
| 217 |
+
# a) Merge link functions into loss functions for speed and numerical
|
| 218 |
+
# stability, i.e. use raw_prediction instead of y_pred in signature.
|
| 219 |
+
# b) Pure C functions (nogil) calculate single points (single sample)
|
| 220 |
+
# c) Wrap C functions in a loop to get Python functions operating on ndarrays.
|
| 221 |
+
# - Write loops manually---use Tempita for this.
|
| 222 |
+
# Reason: There is still some performance overhead when using a wrapper
|
| 223 |
+
# function "wrap" that carries out the loop and gets as argument a function
|
| 224 |
+
# pointer to one of the C functions from b), e.g.
|
| 225 |
+
# wrap(closs_half_poisson, y_true, ...)
|
| 226 |
+
# - Pass n_threads as argument to prange and propagate option to all callers.
|
| 227 |
+
# d) Provide classes (Cython extension types) per loss (names start with Cy) in
|
| 228 |
+
# order to have semantical structured objects.
|
| 229 |
+
# - Member functions for single points just call the C function from b).
|
| 230 |
+
# These are used e.g. in SGD `_plain_sgd`.
|
| 231 |
+
# - Member functions operating on ndarrays, see c), looping over calls to C
|
| 232 |
+
# functions from b).
|
| 233 |
+
# e) Provide convenience Python classes that compose from these extension types
|
| 234 |
+
# elsewhere (see loss.py)
|
| 235 |
+
# - Example: loss.gradient calls CyLoss.gradient but does some input
|
| 236 |
+
# checking like None -> np.empty().
|
| 237 |
+
#
|
| 238 |
+
# Note: We require 1-dim ndarrays to be contiguous.
|
| 239 |
+
|
| 240 |
+
from cython.parallel import parallel, prange
|
| 241 |
+
import numpy as np
|
| 242 |
+
|
| 243 |
+
from libc.math cimport exp, fabs, log, log1p, pow
|
| 244 |
+
from libc.stdlib cimport malloc, free
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# -------------------------------------
|
| 248 |
+
# Helper functions
|
| 249 |
+
# -------------------------------------
|
| 250 |
+
# Numerically stable version of log(1 + exp(x)) for double precision, see Eq. (10) of
|
| 251 |
+
# https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
|
| 252 |
+
# Note: The only important cutoff is at x = 18. All others are to save computation
|
| 253 |
+
# time. Compared to the reference, we add the additional case distinction x <= -2 in
|
| 254 |
+
# order to use log instead of log1p for improved performance. As with the other
|
| 255 |
+
# cutoffs, this is accurate within machine precision of double.
|
| 256 |
+
cdef inline double log1pexp(double x) noexcept nogil:
|
| 257 |
+
if x <= -37:
|
| 258 |
+
return exp(x)
|
| 259 |
+
elif x <= -2:
|
| 260 |
+
return log1p(exp(x))
|
| 261 |
+
elif x <= 18:
|
| 262 |
+
return log(1. + exp(x))
|
| 263 |
+
elif x <= 33.3:
|
| 264 |
+
return x + exp(-x)
|
| 265 |
+
else:
|
| 266 |
+
return x
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
cdef inline double_pair sum_exp_minus_max(
|
| 270 |
+
const int i,
|
| 271 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 272 |
+
floating_out *p # OUT
|
| 273 |
+
) noexcept nogil:
|
| 274 |
+
# Thread local buffers are used to store part of the results via p.
|
| 275 |
+
# The results are stored as follows:
|
| 276 |
+
# p[k] = exp(raw_prediction_i_k - max_value) for k = 0 to n_classes-1
|
| 277 |
+
# return.val1 = max_value = max(raw_prediction_i_k, k = 0 to n_classes-1)
|
| 278 |
+
# return.val2 = sum_exps = sum(p[k], k = 0 to n_classes-1) = sum of exponentials
|
| 279 |
+
# len(p) must be n_classes
|
| 280 |
+
# Notes:
|
| 281 |
+
# - We return the max value and sum of exps (stored in p) as a double_pair.
|
| 282 |
+
# - i needs to be passed (and stays constant) because otherwise Cython does
|
| 283 |
+
# not generate optimal code, see
|
| 284 |
+
# https://github.com/scikit-learn/scikit-learn/issues/17299
|
| 285 |
+
# - We do not normalize p by calculating p[k] = p[k] / sum_exps.
|
| 286 |
+
# This helps to save one loop over k.
|
| 287 |
+
cdef:
|
| 288 |
+
int k
|
| 289 |
+
int n_classes = raw_prediction.shape[1]
|
| 290 |
+
double_pair max_value_and_sum_exps # val1 = max_value, val2 = sum_exps
|
| 291 |
+
|
| 292 |
+
max_value_and_sum_exps.val1 = raw_prediction[i, 0]
|
| 293 |
+
max_value_and_sum_exps.val2 = 0
|
| 294 |
+
for k in range(1, n_classes):
|
| 295 |
+
# Compute max value of array for numerical stability
|
| 296 |
+
if max_value_and_sum_exps.val1 < raw_prediction[i, k]:
|
| 297 |
+
max_value_and_sum_exps.val1 = raw_prediction[i, k]
|
| 298 |
+
|
| 299 |
+
for k in range(n_classes):
|
| 300 |
+
p[k] = exp(raw_prediction[i, k] - max_value_and_sum_exps.val1)
|
| 301 |
+
max_value_and_sum_exps.val2 += p[k]
|
| 302 |
+
|
| 303 |
+
return max_value_and_sum_exps
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
# -------------------------------------
|
| 307 |
+
# Single point inline C functions
|
| 308 |
+
# -------------------------------------
|
| 309 |
+
# Half Squared Error
|
| 310 |
+
cdef inline double closs_half_squared_error(
|
| 311 |
+
double y_true,
|
| 312 |
+
double raw_prediction
|
| 313 |
+
) noexcept nogil:
|
| 314 |
+
return 0.5 * (raw_prediction - y_true) * (raw_prediction - y_true)
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
cdef inline double cgradient_half_squared_error(
|
| 318 |
+
double y_true,
|
| 319 |
+
double raw_prediction
|
| 320 |
+
) noexcept nogil:
|
| 321 |
+
return raw_prediction - y_true
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
cdef inline double_pair cgrad_hess_half_squared_error(
|
| 325 |
+
double y_true,
|
| 326 |
+
double raw_prediction
|
| 327 |
+
) noexcept nogil:
|
| 328 |
+
cdef double_pair gh
|
| 329 |
+
gh.val1 = raw_prediction - y_true # gradient
|
| 330 |
+
gh.val2 = 1. # hessian
|
| 331 |
+
return gh
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# Absolute Error
|
| 335 |
+
cdef inline double closs_absolute_error(
|
| 336 |
+
double y_true,
|
| 337 |
+
double raw_prediction
|
| 338 |
+
) noexcept nogil:
|
| 339 |
+
return fabs(raw_prediction - y_true)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
cdef inline double cgradient_absolute_error(
|
| 343 |
+
double y_true,
|
| 344 |
+
double raw_prediction
|
| 345 |
+
) noexcept nogil:
|
| 346 |
+
return 1. if raw_prediction > y_true else -1.
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
cdef inline double_pair cgrad_hess_absolute_error(
|
| 350 |
+
double y_true,
|
| 351 |
+
double raw_prediction
|
| 352 |
+
) noexcept nogil:
|
| 353 |
+
cdef double_pair gh
|
| 354 |
+
# Note that exact hessian = 0 almost everywhere. Optimization routines like
|
| 355 |
+
# in HGBT, however, need a hessian > 0. Therefore, we assign 1.
|
| 356 |
+
gh.val1 = 1. if raw_prediction > y_true else -1. # gradient
|
| 357 |
+
gh.val2 = 1. # hessian
|
| 358 |
+
return gh
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
# Quantile Loss / Pinball Loss
|
| 362 |
+
cdef inline double closs_pinball_loss(
|
| 363 |
+
double y_true,
|
| 364 |
+
double raw_prediction,
|
| 365 |
+
double quantile
|
| 366 |
+
) noexcept nogil:
|
| 367 |
+
return (quantile * (y_true - raw_prediction) if y_true >= raw_prediction
|
| 368 |
+
else (1. - quantile) * (raw_prediction - y_true))
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
cdef inline double cgradient_pinball_loss(
|
| 372 |
+
double y_true,
|
| 373 |
+
double raw_prediction,
|
| 374 |
+
double quantile
|
| 375 |
+
) noexcept nogil:
|
| 376 |
+
return -quantile if y_true >=raw_prediction else 1. - quantile
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
cdef inline double_pair cgrad_hess_pinball_loss(
|
| 380 |
+
double y_true,
|
| 381 |
+
double raw_prediction,
|
| 382 |
+
double quantile
|
| 383 |
+
) noexcept nogil:
|
| 384 |
+
cdef double_pair gh
|
| 385 |
+
# Note that exact hessian = 0 almost everywhere. Optimization routines like
|
| 386 |
+
# in HGBT, however, need a hessian > 0. Therefore, we assign 1.
|
| 387 |
+
gh.val1 = -quantile if y_true >=raw_prediction else 1. - quantile # gradient
|
| 388 |
+
gh.val2 = 1. # hessian
|
| 389 |
+
return gh
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
# Huber Loss
|
| 393 |
+
cdef inline double closs_huber_loss(
|
| 394 |
+
double y_true,
|
| 395 |
+
double raw_prediction,
|
| 396 |
+
double delta,
|
| 397 |
+
) noexcept nogil:
|
| 398 |
+
cdef double abserr = fabs(y_true - raw_prediction)
|
| 399 |
+
if abserr <= delta:
|
| 400 |
+
return 0.5 * abserr**2
|
| 401 |
+
else:
|
| 402 |
+
return delta * (abserr - 0.5 * delta)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
cdef inline double cgradient_huber_loss(
|
| 406 |
+
double y_true,
|
| 407 |
+
double raw_prediction,
|
| 408 |
+
double delta,
|
| 409 |
+
) noexcept nogil:
|
| 410 |
+
cdef double res = raw_prediction - y_true
|
| 411 |
+
if fabs(res) <= delta:
|
| 412 |
+
return res
|
| 413 |
+
else:
|
| 414 |
+
return delta if res >=0 else -delta
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
cdef inline double_pair cgrad_hess_huber_loss(
|
| 418 |
+
double y_true,
|
| 419 |
+
double raw_prediction,
|
| 420 |
+
double delta,
|
| 421 |
+
) noexcept nogil:
|
| 422 |
+
cdef double_pair gh
|
| 423 |
+
gh.val2 = raw_prediction - y_true # used as temporary
|
| 424 |
+
if fabs(gh.val2) <= delta:
|
| 425 |
+
gh.val1 = gh.val2 # gradient
|
| 426 |
+
gh.val2 = 1 # hessian
|
| 427 |
+
else:
|
| 428 |
+
gh.val1 = delta if gh.val2 >=0 else -delta # gradient
|
| 429 |
+
gh.val2 = 0 # hessian
|
| 430 |
+
return gh
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
# Half Poisson Deviance with Log-Link, dropping constant terms
|
| 434 |
+
cdef inline double closs_half_poisson(
|
| 435 |
+
double y_true,
|
| 436 |
+
double raw_prediction
|
| 437 |
+
) noexcept nogil:
|
| 438 |
+
return exp(raw_prediction) - y_true * raw_prediction
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
cdef inline double cgradient_half_poisson(
|
| 442 |
+
double y_true,
|
| 443 |
+
double raw_prediction
|
| 444 |
+
) noexcept nogil:
|
| 445 |
+
# y_pred - y_true
|
| 446 |
+
return exp(raw_prediction) - y_true
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
cdef inline double_pair closs_grad_half_poisson(
|
| 450 |
+
double y_true,
|
| 451 |
+
double raw_prediction
|
| 452 |
+
) noexcept nogil:
|
| 453 |
+
cdef double_pair lg
|
| 454 |
+
lg.val2 = exp(raw_prediction) # used as temporary
|
| 455 |
+
lg.val1 = lg.val2 - y_true * raw_prediction # loss
|
| 456 |
+
lg.val2 -= y_true # gradient
|
| 457 |
+
return lg
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
cdef inline double_pair cgrad_hess_half_poisson(
|
| 461 |
+
double y_true,
|
| 462 |
+
double raw_prediction
|
| 463 |
+
) noexcept nogil:
|
| 464 |
+
cdef double_pair gh
|
| 465 |
+
gh.val2 = exp(raw_prediction) # hessian
|
| 466 |
+
gh.val1 = gh.val2 - y_true # gradient
|
| 467 |
+
return gh
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
# Half Gamma Deviance with Log-Link, dropping constant terms
|
| 471 |
+
cdef inline double closs_half_gamma(
|
| 472 |
+
double y_true,
|
| 473 |
+
double raw_prediction
|
| 474 |
+
) noexcept nogil:
|
| 475 |
+
return raw_prediction + y_true * exp(-raw_prediction)
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
cdef inline double cgradient_half_gamma(
|
| 479 |
+
double y_true,
|
| 480 |
+
double raw_prediction
|
| 481 |
+
) noexcept nogil:
|
| 482 |
+
return 1. - y_true * exp(-raw_prediction)
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
cdef inline double_pair closs_grad_half_gamma(
|
| 486 |
+
double y_true,
|
| 487 |
+
double raw_prediction
|
| 488 |
+
) noexcept nogil:
|
| 489 |
+
cdef double_pair lg
|
| 490 |
+
lg.val2 = exp(-raw_prediction) # used as temporary
|
| 491 |
+
lg.val1 = raw_prediction + y_true * lg.val2 # loss
|
| 492 |
+
lg.val2 = 1. - y_true * lg.val2 # gradient
|
| 493 |
+
return lg
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
cdef inline double_pair cgrad_hess_half_gamma(
|
| 497 |
+
double y_true,
|
| 498 |
+
double raw_prediction
|
| 499 |
+
) noexcept nogil:
|
| 500 |
+
cdef double_pair gh
|
| 501 |
+
gh.val2 = exp(-raw_prediction) # used as temporary
|
| 502 |
+
gh.val1 = 1. - y_true * gh.val2 # gradient
|
| 503 |
+
gh.val2 *= y_true # hessian
|
| 504 |
+
return gh
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
# Half Tweedie Deviance with Log-Link, dropping constant terms
|
| 508 |
+
# Note that by dropping constants this is no longer continuous in parameter power.
|
| 509 |
+
cdef inline double closs_half_tweedie(
|
| 510 |
+
double y_true,
|
| 511 |
+
double raw_prediction,
|
| 512 |
+
double power
|
| 513 |
+
) noexcept nogil:
|
| 514 |
+
if power == 0.:
|
| 515 |
+
return closs_half_squared_error(y_true, exp(raw_prediction))
|
| 516 |
+
elif power == 1.:
|
| 517 |
+
return closs_half_poisson(y_true, raw_prediction)
|
| 518 |
+
elif power == 2.:
|
| 519 |
+
return closs_half_gamma(y_true, raw_prediction)
|
| 520 |
+
else:
|
| 521 |
+
return (exp((2. - power) * raw_prediction) / (2. - power)
|
| 522 |
+
- y_true * exp((1. - power) * raw_prediction) / (1. - power))
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
cdef inline double cgradient_half_tweedie(
|
| 526 |
+
double y_true,
|
| 527 |
+
double raw_prediction,
|
| 528 |
+
double power
|
| 529 |
+
) noexcept nogil:
|
| 530 |
+
cdef double exp1
|
| 531 |
+
if power == 0.:
|
| 532 |
+
exp1 = exp(raw_prediction)
|
| 533 |
+
return exp1 * (exp1 - y_true)
|
| 534 |
+
elif power == 1.:
|
| 535 |
+
return cgradient_half_poisson(y_true, raw_prediction)
|
| 536 |
+
elif power == 2.:
|
| 537 |
+
return cgradient_half_gamma(y_true, raw_prediction)
|
| 538 |
+
else:
|
| 539 |
+
return (exp((2. - power) * raw_prediction)
|
| 540 |
+
- y_true * exp((1. - power) * raw_prediction))
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
cdef inline double_pair closs_grad_half_tweedie(
|
| 544 |
+
double y_true,
|
| 545 |
+
double raw_prediction,
|
| 546 |
+
double power
|
| 547 |
+
) noexcept nogil:
|
| 548 |
+
cdef double_pair lg
|
| 549 |
+
cdef double exp1, exp2
|
| 550 |
+
if power == 0.:
|
| 551 |
+
exp1 = exp(raw_prediction)
|
| 552 |
+
lg.val1 = closs_half_squared_error(y_true, exp1) # loss
|
| 553 |
+
lg.val2 = exp1 * (exp1 - y_true) # gradient
|
| 554 |
+
elif power == 1.:
|
| 555 |
+
return closs_grad_half_poisson(y_true, raw_prediction)
|
| 556 |
+
elif power == 2.:
|
| 557 |
+
return closs_grad_half_gamma(y_true, raw_prediction)
|
| 558 |
+
else:
|
| 559 |
+
exp1 = exp((1. - power) * raw_prediction)
|
| 560 |
+
exp2 = exp((2. - power) * raw_prediction)
|
| 561 |
+
lg.val1 = exp2 / (2. - power) - y_true * exp1 / (1. - power) # loss
|
| 562 |
+
lg.val2 = exp2 - y_true * exp1 # gradient
|
| 563 |
+
return lg
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
cdef inline double_pair cgrad_hess_half_tweedie(
|
| 567 |
+
double y_true,
|
| 568 |
+
double raw_prediction,
|
| 569 |
+
double power
|
| 570 |
+
) noexcept nogil:
|
| 571 |
+
cdef double_pair gh
|
| 572 |
+
cdef double exp1, exp2
|
| 573 |
+
if power == 0.:
|
| 574 |
+
exp1 = exp(raw_prediction)
|
| 575 |
+
gh.val1 = exp1 * (exp1 - y_true) # gradient
|
| 576 |
+
gh.val2 = exp1 * (2 * exp1 - y_true) # hessian
|
| 577 |
+
elif power == 1.:
|
| 578 |
+
return cgrad_hess_half_poisson(y_true, raw_prediction)
|
| 579 |
+
elif power == 2.:
|
| 580 |
+
return cgrad_hess_half_gamma(y_true, raw_prediction)
|
| 581 |
+
else:
|
| 582 |
+
exp1 = exp((1. - power) * raw_prediction)
|
| 583 |
+
exp2 = exp((2. - power) * raw_prediction)
|
| 584 |
+
gh.val1 = exp2 - y_true * exp1 # gradient
|
| 585 |
+
gh.val2 = (2. - power) * exp2 - (1. - power) * y_true * exp1 # hessian
|
| 586 |
+
return gh
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
# Half Tweedie Deviance with identity link, without dropping constant terms!
|
| 590 |
+
# Therefore, best loss value is zero.
|
| 591 |
+
cdef inline double closs_half_tweedie_identity(
|
| 592 |
+
double y_true,
|
| 593 |
+
double raw_prediction,
|
| 594 |
+
double power
|
| 595 |
+
) noexcept nogil:
|
| 596 |
+
cdef double tmp
|
| 597 |
+
if power == 0.:
|
| 598 |
+
return closs_half_squared_error(y_true, raw_prediction)
|
| 599 |
+
elif power == 1.:
|
| 600 |
+
if y_true == 0:
|
| 601 |
+
return raw_prediction
|
| 602 |
+
else:
|
| 603 |
+
return y_true * log(y_true/raw_prediction) + raw_prediction - y_true
|
| 604 |
+
elif power == 2.:
|
| 605 |
+
return log(raw_prediction/y_true) + y_true/raw_prediction - 1.
|
| 606 |
+
else:
|
| 607 |
+
tmp = pow(raw_prediction, 1. - power)
|
| 608 |
+
tmp = raw_prediction * tmp / (2. - power) - y_true * tmp / (1. - power)
|
| 609 |
+
if y_true > 0:
|
| 610 |
+
tmp += pow(y_true, 2. - power) / ((1. - power) * (2. - power))
|
| 611 |
+
return tmp
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
cdef inline double cgradient_half_tweedie_identity(
|
| 615 |
+
double y_true,
|
| 616 |
+
double raw_prediction,
|
| 617 |
+
double power
|
| 618 |
+
) noexcept nogil:
|
| 619 |
+
if power == 0.:
|
| 620 |
+
return raw_prediction - y_true
|
| 621 |
+
elif power == 1.:
|
| 622 |
+
return 1. - y_true / raw_prediction
|
| 623 |
+
elif power == 2.:
|
| 624 |
+
return (raw_prediction - y_true) / (raw_prediction * raw_prediction)
|
| 625 |
+
else:
|
| 626 |
+
return pow(raw_prediction, -power) * (raw_prediction - y_true)
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
cdef inline double_pair closs_grad_half_tweedie_identity(
|
| 630 |
+
double y_true,
|
| 631 |
+
double raw_prediction,
|
| 632 |
+
double power
|
| 633 |
+
) noexcept nogil:
|
| 634 |
+
cdef double_pair lg
|
| 635 |
+
cdef double tmp
|
| 636 |
+
if power == 0.:
|
| 637 |
+
lg.val2 = raw_prediction - y_true # gradient
|
| 638 |
+
lg.val1 = 0.5 * lg.val2 * lg.val2 # loss
|
| 639 |
+
elif power == 1.:
|
| 640 |
+
if y_true == 0:
|
| 641 |
+
lg.val1 = raw_prediction
|
| 642 |
+
else:
|
| 643 |
+
lg.val1 = (y_true * log(y_true/raw_prediction) # loss
|
| 644 |
+
+ raw_prediction - y_true)
|
| 645 |
+
lg.val2 = 1. - y_true / raw_prediction # gradient
|
| 646 |
+
elif power == 2.:
|
| 647 |
+
lg.val1 = log(raw_prediction/y_true) + y_true/raw_prediction - 1. # loss
|
| 648 |
+
tmp = raw_prediction * raw_prediction
|
| 649 |
+
lg.val2 = (raw_prediction - y_true) / tmp # gradient
|
| 650 |
+
else:
|
| 651 |
+
tmp = pow(raw_prediction, 1. - power)
|
| 652 |
+
lg.val1 = (raw_prediction * tmp / (2. - power) # loss
|
| 653 |
+
- y_true * tmp / (1. - power))
|
| 654 |
+
if y_true > 0:
|
| 655 |
+
lg.val1 += (pow(y_true, 2. - power)
|
| 656 |
+
/ ((1. - power) * (2. - power)))
|
| 657 |
+
lg.val2 = tmp * (1. - y_true / raw_prediction) # gradient
|
| 658 |
+
return lg
|
| 659 |
+
|
| 660 |
+
|
| 661 |
+
cdef inline double_pair cgrad_hess_half_tweedie_identity(
|
| 662 |
+
double y_true,
|
| 663 |
+
double raw_prediction,
|
| 664 |
+
double power
|
| 665 |
+
) noexcept nogil:
|
| 666 |
+
cdef double_pair gh
|
| 667 |
+
cdef double tmp
|
| 668 |
+
if power == 0.:
|
| 669 |
+
gh.val1 = raw_prediction - y_true # gradient
|
| 670 |
+
gh.val2 = 1. # hessian
|
| 671 |
+
elif power == 1.:
|
| 672 |
+
gh.val1 = 1. - y_true / raw_prediction # gradient
|
| 673 |
+
gh.val2 = y_true / (raw_prediction * raw_prediction) # hessian
|
| 674 |
+
elif power == 2.:
|
| 675 |
+
tmp = raw_prediction * raw_prediction
|
| 676 |
+
gh.val1 = (raw_prediction - y_true) / tmp # gradient
|
| 677 |
+
gh.val2 = (-1. + 2. * y_true / raw_prediction) / tmp # hessian
|
| 678 |
+
else:
|
| 679 |
+
tmp = pow(raw_prediction, -power)
|
| 680 |
+
gh.val1 = tmp * (raw_prediction - y_true) # gradient
|
| 681 |
+
gh.val2 = tmp * ((1. - power) + power * y_true / raw_prediction) # hessian
|
| 682 |
+
return gh
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
# Half Binomial deviance with logit-link, aka log-loss or binary cross entropy
|
| 686 |
+
cdef inline double closs_half_binomial(
|
| 687 |
+
double y_true,
|
| 688 |
+
double raw_prediction
|
| 689 |
+
) noexcept nogil:
|
| 690 |
+
# log1p(exp(raw_prediction)) - y_true * raw_prediction
|
| 691 |
+
return log1pexp(raw_prediction) - y_true * raw_prediction
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
cdef inline double cgradient_half_binomial(
|
| 695 |
+
double y_true,
|
| 696 |
+
double raw_prediction
|
| 697 |
+
) noexcept nogil:
|
| 698 |
+
# gradient = y_pred - y_true = expit(raw_prediction) - y_true
|
| 699 |
+
# Numerically more stable, see http://fa.bianp.net/blog/2019/evaluate_logistic/
|
| 700 |
+
# if raw_prediction < 0:
|
| 701 |
+
# exp_tmp = exp(raw_prediction)
|
| 702 |
+
# return ((1 - y_true) * exp_tmp - y_true) / (1 + exp_tmp)
|
| 703 |
+
# else:
|
| 704 |
+
# exp_tmp = exp(-raw_prediction)
|
| 705 |
+
# return ((1 - y_true) - y_true * exp_tmp) / (1 + exp_tmp)
|
| 706 |
+
# Note that optimal speed would be achieved, at the cost of precision, by
|
| 707 |
+
# return expit(raw_prediction) - y_true
|
| 708 |
+
# i.e. no "if else" and an own inline implementation of expit instead of
|
| 709 |
+
# from scipy.special.cython_special cimport expit
|
| 710 |
+
# The case distinction raw_prediction < 0 in the stable implementation does not
|
| 711 |
+
# provide significant better precision apart from protecting overflow of exp(..).
|
| 712 |
+
# The branch (if else), however, can incur runtime costs of up to 30%.
|
| 713 |
+
# Instead, we help branch prediction by almost always ending in the first if clause
|
| 714 |
+
# and making the second branch (else) a bit simpler. This has the exact same
|
| 715 |
+
# precision but is faster than the stable implementation.
|
| 716 |
+
# As branching criteria, we use the same cutoff as in log1pexp. Note that the
|
| 717 |
+
# maximal value to get gradient = -1 with y_true = 1 is -37.439198610162731
|
| 718 |
+
# (based on mpmath), and scipy.special.logit(np.finfo(float).eps) ~ -36.04365.
|
| 719 |
+
cdef double exp_tmp
|
| 720 |
+
if raw_prediction > -37:
|
| 721 |
+
exp_tmp = exp(-raw_prediction)
|
| 722 |
+
return ((1 - y_true) - y_true * exp_tmp) / (1 + exp_tmp)
|
| 723 |
+
else:
|
| 724 |
+
# expit(raw_prediction) = exp(raw_prediction) for raw_prediction <= -37
|
| 725 |
+
return exp(raw_prediction) - y_true
|
| 726 |
+
|
| 727 |
+
|
| 728 |
+
cdef inline double_pair closs_grad_half_binomial(
|
| 729 |
+
double y_true,
|
| 730 |
+
double raw_prediction
|
| 731 |
+
) noexcept nogil:
|
| 732 |
+
cdef double_pair lg
|
| 733 |
+
# Same if else conditions as in log1pexp.
|
| 734 |
+
if raw_prediction <= -37:
|
| 735 |
+
lg.val2 = exp(raw_prediction) # used as temporary
|
| 736 |
+
lg.val1 = lg.val2 - y_true * raw_prediction # loss
|
| 737 |
+
lg.val2 -= y_true # gradient
|
| 738 |
+
elif raw_prediction <= -2:
|
| 739 |
+
lg.val2 = exp(raw_prediction) # used as temporary
|
| 740 |
+
lg.val1 = log1p(lg.val2) - y_true * raw_prediction # loss
|
| 741 |
+
lg.val2 = ((1 - y_true) * lg.val2 - y_true) / (1 + lg.val2) # gradient
|
| 742 |
+
elif raw_prediction <= 18:
|
| 743 |
+
lg.val2 = exp(-raw_prediction) # used as temporary
|
| 744 |
+
# log1p(exp(x)) = log(1 + exp(x)) = x + log1p(exp(-x))
|
| 745 |
+
lg.val1 = log1p(lg.val2) + (1 - y_true) * raw_prediction # loss
|
| 746 |
+
lg.val2 = ((1 - y_true) - y_true * lg.val2) / (1 + lg.val2) # gradient
|
| 747 |
+
else:
|
| 748 |
+
lg.val2 = exp(-raw_prediction) # used as temporary
|
| 749 |
+
lg.val1 = lg.val2 + (1 - y_true) * raw_prediction # loss
|
| 750 |
+
lg.val2 = ((1 - y_true) - y_true * lg.val2) / (1 + lg.val2) # gradient
|
| 751 |
+
return lg
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
cdef inline double_pair cgrad_hess_half_binomial(
|
| 755 |
+
double y_true,
|
| 756 |
+
double raw_prediction
|
| 757 |
+
) noexcept nogil:
|
| 758 |
+
# with y_pred = expit(raw)
|
| 759 |
+
# hessian = y_pred * (1 - y_pred) = exp( raw) / (1 + exp( raw))**2
|
| 760 |
+
# = exp(-raw) / (1 + exp(-raw))**2
|
| 761 |
+
cdef double_pair gh
|
| 762 |
+
# See comment in cgradient_half_binomial.
|
| 763 |
+
if raw_prediction > -37:
|
| 764 |
+
gh.val2 = exp(-raw_prediction) # used as temporary
|
| 765 |
+
gh.val1 = ((1 - y_true) - y_true * gh.val2) / (1 + gh.val2) # gradient
|
| 766 |
+
gh.val2 = gh.val2 / (1 + gh.val2)**2 # hessian
|
| 767 |
+
else:
|
| 768 |
+
gh.val2 = exp(raw_prediction) # = 1. order Taylor in exp(raw_prediction)
|
| 769 |
+
gh.val1 = gh.val2 - y_true
|
| 770 |
+
return gh
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
# Exponential loss with (half) logit-link, aka boosting loss
|
| 774 |
+
cdef inline double closs_exponential(
|
| 775 |
+
double y_true,
|
| 776 |
+
double raw_prediction
|
| 777 |
+
) noexcept nogil:
|
| 778 |
+
cdef double tmp = exp(raw_prediction)
|
| 779 |
+
return y_true / tmp + (1 - y_true) * tmp
|
| 780 |
+
|
| 781 |
+
|
| 782 |
+
cdef inline double cgradient_exponential(
|
| 783 |
+
double y_true,
|
| 784 |
+
double raw_prediction
|
| 785 |
+
) noexcept nogil:
|
| 786 |
+
cdef double tmp = exp(raw_prediction)
|
| 787 |
+
return -y_true / tmp + (1 - y_true) * tmp
|
| 788 |
+
|
| 789 |
+
|
| 790 |
+
cdef inline double_pair closs_grad_exponential(
|
| 791 |
+
double y_true,
|
| 792 |
+
double raw_prediction
|
| 793 |
+
) noexcept nogil:
|
| 794 |
+
cdef double_pair lg
|
| 795 |
+
lg.val2 = exp(raw_prediction) # used as temporary
|
| 796 |
+
|
| 797 |
+
lg.val1 = y_true / lg.val2 + (1 - y_true) * lg.val2 # loss
|
| 798 |
+
lg.val2 = -y_true / lg.val2 + (1 - y_true) * lg.val2 # gradient
|
| 799 |
+
return lg
|
| 800 |
+
|
| 801 |
+
|
| 802 |
+
cdef inline double_pair cgrad_hess_exponential(
|
| 803 |
+
double y_true,
|
| 804 |
+
double raw_prediction
|
| 805 |
+
) noexcept nogil:
|
| 806 |
+
# Note that hessian = loss
|
| 807 |
+
cdef double_pair gh
|
| 808 |
+
gh.val2 = exp(raw_prediction) # used as temporary
|
| 809 |
+
|
| 810 |
+
gh.val1 = -y_true / gh.val2 + (1 - y_true) * gh.val2 # gradient
|
| 811 |
+
gh.val2 = y_true / gh.val2 + (1 - y_true) * gh.val2 # hessian
|
| 812 |
+
return gh
|
| 813 |
+
|
| 814 |
+
|
| 815 |
+
# ---------------------------------------------------
|
| 816 |
+
# Extension Types for Loss Functions of 1-dim targets
|
| 817 |
+
# ---------------------------------------------------
|
| 818 |
+
cdef class CyLossFunction:
|
| 819 |
+
"""Base class for convex loss functions."""
|
| 820 |
+
|
| 821 |
+
def __reduce__(self):
|
| 822 |
+
return (self.__class__, ())
|
| 823 |
+
|
| 824 |
+
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil:
|
| 825 |
+
"""Compute the loss for a single sample.
|
| 826 |
+
|
| 827 |
+
Parameters
|
| 828 |
+
----------
|
| 829 |
+
y_true : double
|
| 830 |
+
Observed, true target value.
|
| 831 |
+
raw_prediction : double
|
| 832 |
+
Raw prediction value (in link space).
|
| 833 |
+
|
| 834 |
+
Returns
|
| 835 |
+
-------
|
| 836 |
+
double
|
| 837 |
+
The loss evaluated at `y_true` and `raw_prediction`.
|
| 838 |
+
"""
|
| 839 |
+
pass
|
| 840 |
+
|
| 841 |
+
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil:
|
| 842 |
+
"""Compute gradient of loss w.r.t. raw_prediction for a single sample.
|
| 843 |
+
|
| 844 |
+
Parameters
|
| 845 |
+
----------
|
| 846 |
+
y_true : double
|
| 847 |
+
Observed, true target value.
|
| 848 |
+
raw_prediction : double
|
| 849 |
+
Raw prediction value (in link space).
|
| 850 |
+
|
| 851 |
+
Returns
|
| 852 |
+
-------
|
| 853 |
+
double
|
| 854 |
+
The derivative of the loss function w.r.t. `raw_prediction`.
|
| 855 |
+
"""
|
| 856 |
+
pass
|
| 857 |
+
|
| 858 |
+
cdef double_pair cy_grad_hess(
|
| 859 |
+
self, double y_true, double raw_prediction
|
| 860 |
+
) noexcept nogil:
|
| 861 |
+
"""Compute gradient and hessian.
|
| 862 |
+
|
| 863 |
+
Gradient and hessian of loss w.r.t. raw_prediction for a single sample.
|
| 864 |
+
|
| 865 |
+
This is usually diagonal in raw_prediction_i and raw_prediction_j.
|
| 866 |
+
Therefore, we return the diagonal element i=j.
|
| 867 |
+
|
| 868 |
+
For a loss with a non-canonical link, this might implement the diagonal
|
| 869 |
+
of the Fisher matrix (=expected hessian) instead of the hessian.
|
| 870 |
+
|
| 871 |
+
Parameters
|
| 872 |
+
----------
|
| 873 |
+
y_true : double
|
| 874 |
+
Observed, true target value.
|
| 875 |
+
raw_prediction : double
|
| 876 |
+
Raw prediction value (in link space).
|
| 877 |
+
|
| 878 |
+
Returns
|
| 879 |
+
-------
|
| 880 |
+
double_pair
|
| 881 |
+
Gradient and hessian of the loss function w.r.t. `raw_prediction`.
|
| 882 |
+
"""
|
| 883 |
+
pass
|
| 884 |
+
|
| 885 |
+
def loss(
|
| 886 |
+
self,
|
| 887 |
+
const floating_in[::1] y_true, # IN
|
| 888 |
+
const floating_in[::1] raw_prediction, # IN
|
| 889 |
+
const floating_in[::1] sample_weight, # IN
|
| 890 |
+
floating_out[::1] loss_out, # OUT
|
| 891 |
+
int n_threads=1
|
| 892 |
+
):
|
| 893 |
+
"""Compute the point-wise loss value for each input.
|
| 894 |
+
|
| 895 |
+
The point-wise loss is written to `loss_out` and no array is returned.
|
| 896 |
+
|
| 897 |
+
Parameters
|
| 898 |
+
----------
|
| 899 |
+
y_true : array of shape (n_samples,)
|
| 900 |
+
Observed, true target values.
|
| 901 |
+
raw_prediction : array of shape (n_samples,)
|
| 902 |
+
Raw prediction values (in link space).
|
| 903 |
+
sample_weight : array of shape (n_samples,) or None
|
| 904 |
+
Sample weights.
|
| 905 |
+
loss_out : array of shape (n_samples,)
|
| 906 |
+
A location into which the result is stored.
|
| 907 |
+
n_threads : int
|
| 908 |
+
Number of threads used by OpenMP (if any).
|
| 909 |
+
"""
|
| 910 |
+
pass
|
| 911 |
+
|
| 912 |
+
def gradient(
|
| 913 |
+
self,
|
| 914 |
+
const floating_in[::1] y_true, # IN
|
| 915 |
+
const floating_in[::1] raw_prediction, # IN
|
| 916 |
+
const floating_in[::1] sample_weight, # IN
|
| 917 |
+
floating_out[::1] gradient_out, # OUT
|
| 918 |
+
int n_threads=1
|
| 919 |
+
):
|
| 920 |
+
"""Compute gradient of loss w.r.t raw_prediction for each input.
|
| 921 |
+
|
| 922 |
+
The gradient is written to `gradient_out` and no array is returned.
|
| 923 |
+
|
| 924 |
+
Parameters
|
| 925 |
+
----------
|
| 926 |
+
y_true : array of shape (n_samples,)
|
| 927 |
+
Observed, true target values.
|
| 928 |
+
raw_prediction : array of shape (n_samples,)
|
| 929 |
+
Raw prediction values (in link space).
|
| 930 |
+
sample_weight : array of shape (n_samples,) or None
|
| 931 |
+
Sample weights.
|
| 932 |
+
gradient_out : array of shape (n_samples,)
|
| 933 |
+
A location into which the result is stored.
|
| 934 |
+
n_threads : int
|
| 935 |
+
Number of threads used by OpenMP (if any).
|
| 936 |
+
"""
|
| 937 |
+
pass
|
| 938 |
+
|
| 939 |
+
def loss_gradient(
|
| 940 |
+
self,
|
| 941 |
+
const floating_in[::1] y_true, # IN
|
| 942 |
+
const floating_in[::1] raw_prediction, # IN
|
| 943 |
+
const floating_in[::1] sample_weight, # IN
|
| 944 |
+
floating_out[::1] loss_out, # OUT
|
| 945 |
+
floating_out[::1] gradient_out, # OUT
|
| 946 |
+
int n_threads=1
|
| 947 |
+
):
|
| 948 |
+
"""Compute loss and gradient of loss w.r.t raw_prediction.
|
| 949 |
+
|
| 950 |
+
The loss and gradient are written to `loss_out` and `gradient_out` and no arrays
|
| 951 |
+
are returned.
|
| 952 |
+
|
| 953 |
+
Parameters
|
| 954 |
+
----------
|
| 955 |
+
y_true : array of shape (n_samples,)
|
| 956 |
+
Observed, true target values.
|
| 957 |
+
raw_prediction : array of shape (n_samples,)
|
| 958 |
+
Raw prediction values (in link space).
|
| 959 |
+
sample_weight : array of shape (n_samples,) or None
|
| 960 |
+
Sample weights.
|
| 961 |
+
loss_out : array of shape (n_samples,) or None
|
| 962 |
+
A location into which the element-wise loss is stored.
|
| 963 |
+
gradient_out : array of shape (n_samples,)
|
| 964 |
+
A location into which the gradient is stored.
|
| 965 |
+
n_threads : int
|
| 966 |
+
Number of threads used by OpenMP (if any).
|
| 967 |
+
"""
|
| 968 |
+
self.loss(y_true, raw_prediction, sample_weight, loss_out, n_threads)
|
| 969 |
+
self.gradient(y_true, raw_prediction, sample_weight, gradient_out, n_threads)
|
| 970 |
+
|
| 971 |
+
def gradient_hessian(
|
| 972 |
+
self,
|
| 973 |
+
const floating_in[::1] y_true, # IN
|
| 974 |
+
const floating_in[::1] raw_prediction, # IN
|
| 975 |
+
const floating_in[::1] sample_weight, # IN
|
| 976 |
+
floating_out[::1] gradient_out, # OUT
|
| 977 |
+
floating_out[::1] hessian_out, # OUT
|
| 978 |
+
int n_threads=1
|
| 979 |
+
):
|
| 980 |
+
"""Compute gradient and hessian of loss w.r.t raw_prediction.
|
| 981 |
+
|
| 982 |
+
The gradient and hessian are written to `gradient_out` and `hessian_out` and no
|
| 983 |
+
arrays are returned.
|
| 984 |
+
|
| 985 |
+
Parameters
|
| 986 |
+
----------
|
| 987 |
+
y_true : array of shape (n_samples,)
|
| 988 |
+
Observed, true target values.
|
| 989 |
+
raw_prediction : array of shape (n_samples,)
|
| 990 |
+
Raw prediction values (in link space).
|
| 991 |
+
sample_weight : array of shape (n_samples,) or None
|
| 992 |
+
Sample weights.
|
| 993 |
+
gradient_out : array of shape (n_samples,)
|
| 994 |
+
A location into which the gradient is stored.
|
| 995 |
+
hessian_out : array of shape (n_samples,)
|
| 996 |
+
A location into which the hessian is stored.
|
| 997 |
+
n_threads : int
|
| 998 |
+
Number of threads used by OpenMP (if any).
|
| 999 |
+
"""
|
| 1000 |
+
pass
|
| 1001 |
+
|
| 1002 |
+
|
| 1003 |
+
{{for name, docstring, param, closs, closs_grad, cgrad, cgrad_hess, in class_list}}
|
| 1004 |
+
{{py:
|
| 1005 |
+
if param is None:
|
| 1006 |
+
with_param = ""
|
| 1007 |
+
else:
|
| 1008 |
+
with_param = ", self." + param
|
| 1009 |
+
}}
|
| 1010 |
+
|
| 1011 |
+
cdef class {{name}}(CyLossFunction):
|
| 1012 |
+
"""{{docstring}}"""
|
| 1013 |
+
|
| 1014 |
+
{{if param is not None}}
|
| 1015 |
+
def __init__(self, {{param}}):
|
| 1016 |
+
self.{{param}} = {{param}}
|
| 1017 |
+
{{endif}}
|
| 1018 |
+
|
| 1019 |
+
{{if param is not None}}
|
| 1020 |
+
def __reduce__(self):
|
| 1021 |
+
return (self.__class__, (self.{{param}},))
|
| 1022 |
+
{{endif}}
|
| 1023 |
+
|
| 1024 |
+
cdef inline double cy_loss(self, double y_true, double raw_prediction) noexcept nogil:
|
| 1025 |
+
return {{closs}}(y_true, raw_prediction{{with_param}})
|
| 1026 |
+
|
| 1027 |
+
cdef inline double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil:
|
| 1028 |
+
return {{cgrad}}(y_true, raw_prediction{{with_param}})
|
| 1029 |
+
|
| 1030 |
+
cdef inline double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil:
|
| 1031 |
+
return {{cgrad_hess}}(y_true, raw_prediction{{with_param}})
|
| 1032 |
+
|
| 1033 |
+
def loss(
|
| 1034 |
+
self,
|
| 1035 |
+
const floating_in[::1] y_true, # IN
|
| 1036 |
+
const floating_in[::1] raw_prediction, # IN
|
| 1037 |
+
const floating_in[::1] sample_weight, # IN
|
| 1038 |
+
floating_out[::1] loss_out, # OUT
|
| 1039 |
+
int n_threads=1
|
| 1040 |
+
):
|
| 1041 |
+
cdef:
|
| 1042 |
+
int i
|
| 1043 |
+
int n_samples = y_true.shape[0]
|
| 1044 |
+
|
| 1045 |
+
if sample_weight is None:
|
| 1046 |
+
for i in prange(
|
| 1047 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1048 |
+
):
|
| 1049 |
+
loss_out[i] = {{closs}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1050 |
+
else:
|
| 1051 |
+
for i in prange(
|
| 1052 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1053 |
+
):
|
| 1054 |
+
loss_out[i] = sample_weight[i] * {{closs}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1055 |
+
|
| 1056 |
+
{{if closs_grad is not None}}
|
| 1057 |
+
def loss_gradient(
|
| 1058 |
+
self,
|
| 1059 |
+
const floating_in[::1] y_true, # IN
|
| 1060 |
+
const floating_in[::1] raw_prediction, # IN
|
| 1061 |
+
const floating_in[::1] sample_weight, # IN
|
| 1062 |
+
floating_out[::1] loss_out, # OUT
|
| 1063 |
+
floating_out[::1] gradient_out, # OUT
|
| 1064 |
+
int n_threads=1
|
| 1065 |
+
):
|
| 1066 |
+
cdef:
|
| 1067 |
+
int i
|
| 1068 |
+
int n_samples = y_true.shape[0]
|
| 1069 |
+
double_pair dbl2
|
| 1070 |
+
|
| 1071 |
+
if sample_weight is None:
|
| 1072 |
+
for i in prange(
|
| 1073 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1074 |
+
):
|
| 1075 |
+
dbl2 = {{closs_grad}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1076 |
+
loss_out[i] = dbl2.val1
|
| 1077 |
+
gradient_out[i] = dbl2.val2
|
| 1078 |
+
else:
|
| 1079 |
+
for i in prange(
|
| 1080 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1081 |
+
):
|
| 1082 |
+
dbl2 = {{closs_grad}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1083 |
+
loss_out[i] = sample_weight[i] * dbl2.val1
|
| 1084 |
+
gradient_out[i] = sample_weight[i] * dbl2.val2
|
| 1085 |
+
|
| 1086 |
+
{{endif}}
|
| 1087 |
+
|
| 1088 |
+
def gradient(
|
| 1089 |
+
self,
|
| 1090 |
+
const floating_in[::1] y_true, # IN
|
| 1091 |
+
const floating_in[::1] raw_prediction, # IN
|
| 1092 |
+
const floating_in[::1] sample_weight, # IN
|
| 1093 |
+
floating_out[::1] gradient_out, # OUT
|
| 1094 |
+
int n_threads=1
|
| 1095 |
+
):
|
| 1096 |
+
cdef:
|
| 1097 |
+
int i
|
| 1098 |
+
int n_samples = y_true.shape[0]
|
| 1099 |
+
|
| 1100 |
+
if sample_weight is None:
|
| 1101 |
+
for i in prange(
|
| 1102 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1103 |
+
):
|
| 1104 |
+
gradient_out[i] = {{cgrad}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1105 |
+
else:
|
| 1106 |
+
for i in prange(
|
| 1107 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1108 |
+
):
|
| 1109 |
+
gradient_out[i] = sample_weight[i] * {{cgrad}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1110 |
+
|
| 1111 |
+
def gradient_hessian(
|
| 1112 |
+
self,
|
| 1113 |
+
const floating_in[::1] y_true, # IN
|
| 1114 |
+
const floating_in[::1] raw_prediction, # IN
|
| 1115 |
+
const floating_in[::1] sample_weight, # IN
|
| 1116 |
+
floating_out[::1] gradient_out, # OUT
|
| 1117 |
+
floating_out[::1] hessian_out, # OUT
|
| 1118 |
+
int n_threads=1
|
| 1119 |
+
):
|
| 1120 |
+
cdef:
|
| 1121 |
+
int i
|
| 1122 |
+
int n_samples = y_true.shape[0]
|
| 1123 |
+
double_pair dbl2
|
| 1124 |
+
|
| 1125 |
+
if sample_weight is None:
|
| 1126 |
+
for i in prange(
|
| 1127 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1128 |
+
):
|
| 1129 |
+
dbl2 = {{cgrad_hess}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1130 |
+
gradient_out[i] = dbl2.val1
|
| 1131 |
+
hessian_out[i] = dbl2.val2
|
| 1132 |
+
else:
|
| 1133 |
+
for i in prange(
|
| 1134 |
+
n_samples, schedule='static', nogil=True, num_threads=n_threads
|
| 1135 |
+
):
|
| 1136 |
+
dbl2 = {{cgrad_hess}}(y_true[i], raw_prediction[i]{{with_param}})
|
| 1137 |
+
gradient_out[i] = sample_weight[i] * dbl2.val1
|
| 1138 |
+
hessian_out[i] = sample_weight[i] * dbl2.val2
|
| 1139 |
+
|
| 1140 |
+
{{endfor}}
|
| 1141 |
+
|
| 1142 |
+
|
| 1143 |
+
# The multinomial deviance loss is also known as categorical cross-entropy or
|
| 1144 |
+
# multinomial log-likelihood.
|
| 1145 |
+
# Here, we do not inherit from CyLossFunction as its cy_gradient method deviates
|
| 1146 |
+
# from the API.
|
| 1147 |
+
cdef class CyHalfMultinomialLoss():
|
| 1148 |
+
"""Half Multinomial deviance loss with multinomial logit link.
|
| 1149 |
+
|
| 1150 |
+
Domain:
|
| 1151 |
+
y_true in {0, 1, 2, 3, .., n_classes - 1}
|
| 1152 |
+
y_pred in (0, 1)**n_classes, i.e. interval with boundaries excluded
|
| 1153 |
+
|
| 1154 |
+
Link:
|
| 1155 |
+
y_pred = softmax(raw_prediction)
|
| 1156 |
+
|
| 1157 |
+
Note: Label encoding is built-in, i.e. {0, 1, 2, 3, .., n_classes - 1} is
|
| 1158 |
+
mapped to (y_true == k) for k = 0 .. n_classes - 1 which is either 0 or 1.
|
| 1159 |
+
"""
|
| 1160 |
+
|
| 1161 |
+
# Here we deviate from the CyLossFunction API. SAG/SAGA needs direct access to
|
| 1162 |
+
# sample-wise gradients which we provide here.
|
| 1163 |
+
cdef inline void cy_gradient(
|
| 1164 |
+
self,
|
| 1165 |
+
const floating_in y_true,
|
| 1166 |
+
const floating_in[::1] raw_prediction, # IN
|
| 1167 |
+
const floating_in sample_weight,
|
| 1168 |
+
floating_out[::1] gradient_out, # OUT
|
| 1169 |
+
) noexcept nogil:
|
| 1170 |
+
"""Compute gradient of loss w.r.t. `raw_prediction` for a single sample.
|
| 1171 |
+
|
| 1172 |
+
The gradient of the multinomial logistic loss with respect to a class k,
|
| 1173 |
+
and for one sample is:
|
| 1174 |
+
grad_k = - sw * (p[k] - (y==k))
|
| 1175 |
+
|
| 1176 |
+
where:
|
| 1177 |
+
p[k] = proba[k] = exp(raw_prediction[k] - logsumexp(raw_prediction))
|
| 1178 |
+
sw = sample_weight
|
| 1179 |
+
|
| 1180 |
+
Parameters
|
| 1181 |
+
----------
|
| 1182 |
+
y_true : double
|
| 1183 |
+
Observed, true target value.
|
| 1184 |
+
raw_prediction : array of shape (n_classes,)
|
| 1185 |
+
Raw prediction values (in link space).
|
| 1186 |
+
sample_weight : double
|
| 1187 |
+
Sample weight.
|
| 1188 |
+
gradient_out : array of shape (n_classs,)
|
| 1189 |
+
A location into which the gradient is stored.
|
| 1190 |
+
|
| 1191 |
+
Returns
|
| 1192 |
+
-------
|
| 1193 |
+
gradient : double
|
| 1194 |
+
The derivative of the loss function w.r.t. `raw_prediction`.
|
| 1195 |
+
"""
|
| 1196 |
+
cdef:
|
| 1197 |
+
int k
|
| 1198 |
+
int n_classes = raw_prediction.shape[0]
|
| 1199 |
+
double_pair max_value_and_sum_exps
|
| 1200 |
+
const floating_in[:, :] raw = raw_prediction[None, :]
|
| 1201 |
+
|
| 1202 |
+
max_value_and_sum_exps = sum_exp_minus_max(0, raw, &gradient_out[0])
|
| 1203 |
+
for k in range(n_classes):
|
| 1204 |
+
# gradient_out[k] = p_k = y_pred_k = prob of class k
|
| 1205 |
+
gradient_out[k] /= max_value_and_sum_exps.val2
|
| 1206 |
+
# gradient_k = (p_k - (y_true == k)) * sw
|
| 1207 |
+
gradient_out[k] = (gradient_out[k] - (y_true == k)) * sample_weight
|
| 1208 |
+
|
| 1209 |
+
def _test_cy_gradient(
|
| 1210 |
+
self,
|
| 1211 |
+
const floating_in[::1] y_true, # IN
|
| 1212 |
+
const floating_in[:, ::1] raw_prediction, # IN
|
| 1213 |
+
const floating_in[::1] sample_weight, # IN
|
| 1214 |
+
):
|
| 1215 |
+
"""For testing only."""
|
| 1216 |
+
cdef:
|
| 1217 |
+
int i, k
|
| 1218 |
+
int n_samples = y_true.shape[0]
|
| 1219 |
+
int n_classes = raw_prediction.shape[1]
|
| 1220 |
+
floating_in [:, ::1] gradient_out
|
| 1221 |
+
gradient = np.empty((n_samples, n_classes), dtype=np.float64)
|
| 1222 |
+
gradient_out = gradient
|
| 1223 |
+
|
| 1224 |
+
for i in range(n_samples):
|
| 1225 |
+
self.cy_gradient(
|
| 1226 |
+
y_true=y_true[i],
|
| 1227 |
+
raw_prediction=raw_prediction[i, :],
|
| 1228 |
+
sample_weight=1.0 if sample_weight is None else sample_weight[i],
|
| 1229 |
+
gradient_out=gradient_out[i, :],
|
| 1230 |
+
)
|
| 1231 |
+
return gradient
|
| 1232 |
+
|
| 1233 |
+
# Note that we do not assume memory alignment/contiguity of 2d arrays.
|
| 1234 |
+
# There seems to be little benefit in doing so. Benchmarks proofing the
|
| 1235 |
+
# opposite are welcome.
|
| 1236 |
+
def loss(
|
| 1237 |
+
self,
|
| 1238 |
+
const floating_in[::1] y_true, # IN
|
| 1239 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 1240 |
+
const floating_in[::1] sample_weight, # IN
|
| 1241 |
+
floating_out[::1] loss_out, # OUT
|
| 1242 |
+
int n_threads=1
|
| 1243 |
+
):
|
| 1244 |
+
cdef:
|
| 1245 |
+
int i, k
|
| 1246 |
+
int n_samples = y_true.shape[0]
|
| 1247 |
+
int n_classes = raw_prediction.shape[1]
|
| 1248 |
+
floating_in max_value, sum_exps
|
| 1249 |
+
floating_in* p # temporary buffer
|
| 1250 |
+
double_pair max_value_and_sum_exps
|
| 1251 |
+
|
| 1252 |
+
# We assume n_samples > n_classes. In this case having the inner loop
|
| 1253 |
+
# over n_classes is a good default.
|
| 1254 |
+
# TODO: If every memoryview is contiguous and raw_prediction is
|
| 1255 |
+
# f-contiguous, can we write a better algo (loops) to improve
|
| 1256 |
+
# performance?
|
| 1257 |
+
if sample_weight is None:
|
| 1258 |
+
# inner loop over n_classes
|
| 1259 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1260 |
+
# Define private buffer variables as each thread might use its
|
| 1261 |
+
# own.
|
| 1262 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1263 |
+
|
| 1264 |
+
for i in prange(n_samples, schedule='static'):
|
| 1265 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1266 |
+
max_value = max_value_and_sum_exps.val1
|
| 1267 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1268 |
+
loss_out[i] = log(sum_exps) + max_value
|
| 1269 |
+
|
| 1270 |
+
# label encoded y_true
|
| 1271 |
+
k = int(y_true[i])
|
| 1272 |
+
loss_out[i] -= raw_prediction[i, k]
|
| 1273 |
+
|
| 1274 |
+
free(p)
|
| 1275 |
+
else:
|
| 1276 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1277 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1278 |
+
|
| 1279 |
+
for i in prange(n_samples, schedule='static'):
|
| 1280 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1281 |
+
max_value = max_value_and_sum_exps.val1
|
| 1282 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1283 |
+
loss_out[i] = log(sum_exps) + max_value
|
| 1284 |
+
|
| 1285 |
+
# label encoded y_true
|
| 1286 |
+
k = int(y_true[i])
|
| 1287 |
+
loss_out[i] -= raw_prediction[i, k]
|
| 1288 |
+
|
| 1289 |
+
loss_out[i] *= sample_weight[i]
|
| 1290 |
+
|
| 1291 |
+
free(p)
|
| 1292 |
+
|
| 1293 |
+
def loss_gradient(
|
| 1294 |
+
self,
|
| 1295 |
+
const floating_in[::1] y_true, # IN
|
| 1296 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 1297 |
+
const floating_in[::1] sample_weight, # IN
|
| 1298 |
+
floating_out[::1] loss_out, # OUT
|
| 1299 |
+
floating_out[:, :] gradient_out, # OUT
|
| 1300 |
+
int n_threads=1
|
| 1301 |
+
):
|
| 1302 |
+
cdef:
|
| 1303 |
+
int i, k
|
| 1304 |
+
int n_samples = y_true.shape[0]
|
| 1305 |
+
int n_classes = raw_prediction.shape[1]
|
| 1306 |
+
floating_in max_value, sum_exps
|
| 1307 |
+
floating_in* p # temporary buffer
|
| 1308 |
+
double_pair max_value_and_sum_exps
|
| 1309 |
+
|
| 1310 |
+
if sample_weight is None:
|
| 1311 |
+
# inner loop over n_classes
|
| 1312 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1313 |
+
# Define private buffer variables as each thread might use its
|
| 1314 |
+
# own.
|
| 1315 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1316 |
+
|
| 1317 |
+
for i in prange(n_samples, schedule='static'):
|
| 1318 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1319 |
+
max_value = max_value_and_sum_exps.val1
|
| 1320 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1321 |
+
loss_out[i] = log(sum_exps) + max_value
|
| 1322 |
+
|
| 1323 |
+
for k in range(n_classes):
|
| 1324 |
+
# label decode y_true
|
| 1325 |
+
if y_true[i] == k:
|
| 1326 |
+
loss_out[i] -= raw_prediction[i, k]
|
| 1327 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1328 |
+
# gradient_k = p_k - (y_true == k)
|
| 1329 |
+
gradient_out[i, k] = p[k] - (y_true[i] == k)
|
| 1330 |
+
|
| 1331 |
+
free(p)
|
| 1332 |
+
else:
|
| 1333 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1334 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1335 |
+
|
| 1336 |
+
for i in prange(n_samples, schedule='static'):
|
| 1337 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1338 |
+
max_value = max_value_and_sum_exps.val1
|
| 1339 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1340 |
+
loss_out[i] = log(sum_exps) + max_value
|
| 1341 |
+
|
| 1342 |
+
for k in range(n_classes):
|
| 1343 |
+
# label decode y_true
|
| 1344 |
+
if y_true[i] == k:
|
| 1345 |
+
loss_out[i] -= raw_prediction[i, k]
|
| 1346 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1347 |
+
# gradient_k = (p_k - (y_true == k)) * sw
|
| 1348 |
+
gradient_out[i, k] = (p[k] - (y_true[i] == k)) * sample_weight[i]
|
| 1349 |
+
|
| 1350 |
+
loss_out[i] *= sample_weight[i]
|
| 1351 |
+
|
| 1352 |
+
free(p)
|
| 1353 |
+
|
| 1354 |
+
def gradient(
|
| 1355 |
+
self,
|
| 1356 |
+
const floating_in[::1] y_true, # IN
|
| 1357 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 1358 |
+
const floating_in[::1] sample_weight, # IN
|
| 1359 |
+
floating_out[:, :] gradient_out, # OUT
|
| 1360 |
+
int n_threads=1
|
| 1361 |
+
):
|
| 1362 |
+
cdef:
|
| 1363 |
+
int i, k
|
| 1364 |
+
int n_samples = y_true.shape[0]
|
| 1365 |
+
int n_classes = raw_prediction.shape[1]
|
| 1366 |
+
floating_in sum_exps
|
| 1367 |
+
floating_in* p # temporary buffer
|
| 1368 |
+
double_pair max_value_and_sum_exps
|
| 1369 |
+
|
| 1370 |
+
if sample_weight is None:
|
| 1371 |
+
# inner loop over n_classes
|
| 1372 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1373 |
+
# Define private buffer variables as each thread might use its
|
| 1374 |
+
# own.
|
| 1375 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1376 |
+
|
| 1377 |
+
for i in prange(n_samples, schedule='static'):
|
| 1378 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1379 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1380 |
+
|
| 1381 |
+
for k in range(n_classes):
|
| 1382 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1383 |
+
# gradient_k = y_pred_k - (y_true == k)
|
| 1384 |
+
gradient_out[i, k] = p[k] - (y_true[i] == k)
|
| 1385 |
+
|
| 1386 |
+
free(p)
|
| 1387 |
+
else:
|
| 1388 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1389 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1390 |
+
|
| 1391 |
+
for i in prange(n_samples, schedule='static'):
|
| 1392 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1393 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1394 |
+
|
| 1395 |
+
for k in range(n_classes):
|
| 1396 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1397 |
+
# gradient_k = (p_k - (y_true == k)) * sw
|
| 1398 |
+
gradient_out[i, k] = (p[k] - (y_true[i] == k)) * sample_weight[i]
|
| 1399 |
+
|
| 1400 |
+
free(p)
|
| 1401 |
+
|
| 1402 |
+
def gradient_hessian(
|
| 1403 |
+
self,
|
| 1404 |
+
const floating_in[::1] y_true, # IN
|
| 1405 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 1406 |
+
const floating_in[::1] sample_weight, # IN
|
| 1407 |
+
floating_out[:, :] gradient_out, # OUT
|
| 1408 |
+
floating_out[:, :] hessian_out, # OUT
|
| 1409 |
+
int n_threads=1
|
| 1410 |
+
):
|
| 1411 |
+
cdef:
|
| 1412 |
+
int i, k
|
| 1413 |
+
int n_samples = y_true.shape[0]
|
| 1414 |
+
int n_classes = raw_prediction.shape[1]
|
| 1415 |
+
floating_in sum_exps
|
| 1416 |
+
floating_in* p # temporary buffer
|
| 1417 |
+
double_pair max_value_and_sum_exps
|
| 1418 |
+
|
| 1419 |
+
if sample_weight is None:
|
| 1420 |
+
# inner loop over n_classes
|
| 1421 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1422 |
+
# Define private buffer variables as each thread might use its
|
| 1423 |
+
# own.
|
| 1424 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1425 |
+
|
| 1426 |
+
for i in prange(n_samples, schedule='static'):
|
| 1427 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1428 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1429 |
+
|
| 1430 |
+
for k in range(n_classes):
|
| 1431 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1432 |
+
# hessian_k = p_k * (1 - p_k)
|
| 1433 |
+
# gradient_k = p_k - (y_true == k)
|
| 1434 |
+
gradient_out[i, k] = p[k] - (y_true[i] == k)
|
| 1435 |
+
hessian_out[i, k] = p[k] * (1. - p[k])
|
| 1436 |
+
|
| 1437 |
+
free(p)
|
| 1438 |
+
else:
|
| 1439 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1440 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1441 |
+
|
| 1442 |
+
for i in prange(n_samples, schedule='static'):
|
| 1443 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1444 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1445 |
+
|
| 1446 |
+
for k in range(n_classes):
|
| 1447 |
+
p[k] /= sum_exps # p_k = y_pred_k = prob of class k
|
| 1448 |
+
# gradient_k = (p_k - (y_true == k)) * sw
|
| 1449 |
+
# hessian_k = p_k * (1 - p_k) * sw
|
| 1450 |
+
gradient_out[i, k] = (p[k] - (y_true[i] == k)) * sample_weight[i]
|
| 1451 |
+
hessian_out[i, k] = (p[k] * (1. - p[k])) * sample_weight[i]
|
| 1452 |
+
|
| 1453 |
+
free(p)
|
| 1454 |
+
|
| 1455 |
+
# This method simplifies the implementation of hessp in linear models,
|
| 1456 |
+
# i.e. the matrix-vector product of the full hessian, not only of the
|
| 1457 |
+
# diagonal (in the classes) approximation as implemented above.
|
| 1458 |
+
def gradient_proba(
|
| 1459 |
+
self,
|
| 1460 |
+
const floating_in[::1] y_true, # IN
|
| 1461 |
+
const floating_in[:, :] raw_prediction, # IN
|
| 1462 |
+
const floating_in[::1] sample_weight, # IN
|
| 1463 |
+
floating_out[:, :] gradient_out, # OUT
|
| 1464 |
+
floating_out[:, :] proba_out, # OUT
|
| 1465 |
+
int n_threads=1
|
| 1466 |
+
):
|
| 1467 |
+
cdef:
|
| 1468 |
+
int i, k
|
| 1469 |
+
int n_samples = y_true.shape[0]
|
| 1470 |
+
int n_classes = raw_prediction.shape[1]
|
| 1471 |
+
floating_in sum_exps
|
| 1472 |
+
floating_in* p # temporary buffer
|
| 1473 |
+
double_pair max_value_and_sum_exps
|
| 1474 |
+
|
| 1475 |
+
if sample_weight is None:
|
| 1476 |
+
# inner loop over n_classes
|
| 1477 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1478 |
+
# Define private buffer variables as each thread might use its
|
| 1479 |
+
# own.
|
| 1480 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1481 |
+
|
| 1482 |
+
for i in prange(n_samples, schedule='static'):
|
| 1483 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1484 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1485 |
+
|
| 1486 |
+
for k in range(n_classes):
|
| 1487 |
+
proba_out[i, k] = p[k] / sum_exps # y_pred_k = prob of class k
|
| 1488 |
+
# gradient_k = y_pred_k - (y_true == k)
|
| 1489 |
+
gradient_out[i, k] = proba_out[i, k] - (y_true[i] == k)
|
| 1490 |
+
|
| 1491 |
+
free(p)
|
| 1492 |
+
else:
|
| 1493 |
+
with nogil, parallel(num_threads=n_threads):
|
| 1494 |
+
p = <floating_in *> malloc(sizeof(floating_in) * (n_classes))
|
| 1495 |
+
|
| 1496 |
+
for i in prange(n_samples, schedule='static'):
|
| 1497 |
+
max_value_and_sum_exps = sum_exp_minus_max(i, raw_prediction, p)
|
| 1498 |
+
sum_exps = max_value_and_sum_exps.val2
|
| 1499 |
+
|
| 1500 |
+
for k in range(n_classes):
|
| 1501 |
+
proba_out[i, k] = p[k] / sum_exps # y_pred_k = prob of class k
|
| 1502 |
+
# gradient_k = (p_k - (y_true == k)) * sw
|
| 1503 |
+
gradient_out[i, k] = (proba_out[i, k] - (y_true[i] == k)) * sample_weight[i]
|
| 1504 |
+
|
| 1505 |
+
free(p)
|
.venv/lib/python3.12/site-packages/sklearn/_loss/link.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Module contains classes for invertible (and differentiable) link functions.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
# Authors: The scikit-learn developers
|
| 6 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 7 |
+
|
| 8 |
+
from abc import ABC, abstractmethod
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from scipy.special import expit, logit
|
| 13 |
+
from scipy.stats import gmean
|
| 14 |
+
|
| 15 |
+
from ..utils.extmath import softmax
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class Interval:
|
| 20 |
+
low: float
|
| 21 |
+
high: float
|
| 22 |
+
low_inclusive: bool
|
| 23 |
+
high_inclusive: bool
|
| 24 |
+
|
| 25 |
+
def __post_init__(self):
|
| 26 |
+
"""Check that low <= high"""
|
| 27 |
+
if self.low > self.high:
|
| 28 |
+
raise ValueError(
|
| 29 |
+
f"One must have low <= high; got low={self.low}, high={self.high}."
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def includes(self, x):
|
| 33 |
+
"""Test whether all values of x are in interval range.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
x : ndarray
|
| 38 |
+
Array whose elements are tested to be in interval range.
|
| 39 |
+
|
| 40 |
+
Returns
|
| 41 |
+
-------
|
| 42 |
+
result : bool
|
| 43 |
+
"""
|
| 44 |
+
if self.low_inclusive:
|
| 45 |
+
low = np.greater_equal(x, self.low)
|
| 46 |
+
else:
|
| 47 |
+
low = np.greater(x, self.low)
|
| 48 |
+
|
| 49 |
+
if not np.all(low):
|
| 50 |
+
return False
|
| 51 |
+
|
| 52 |
+
if self.high_inclusive:
|
| 53 |
+
high = np.less_equal(x, self.high)
|
| 54 |
+
else:
|
| 55 |
+
high = np.less(x, self.high)
|
| 56 |
+
|
| 57 |
+
# Note: np.all returns numpy.bool_
|
| 58 |
+
return bool(np.all(high))
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _inclusive_low_high(interval, dtype=np.float64):
|
| 62 |
+
"""Generate values low and high to be within the interval range.
|
| 63 |
+
|
| 64 |
+
This is used in tests only.
|
| 65 |
+
|
| 66 |
+
Returns
|
| 67 |
+
-------
|
| 68 |
+
low, high : tuple
|
| 69 |
+
The returned values low and high lie within the interval.
|
| 70 |
+
"""
|
| 71 |
+
eps = 10 * np.finfo(dtype).eps
|
| 72 |
+
if interval.low == -np.inf:
|
| 73 |
+
low = -1e10
|
| 74 |
+
elif interval.low < 0:
|
| 75 |
+
low = interval.low * (1 - eps) + eps
|
| 76 |
+
else:
|
| 77 |
+
low = interval.low * (1 + eps) + eps
|
| 78 |
+
|
| 79 |
+
if interval.high == np.inf:
|
| 80 |
+
high = 1e10
|
| 81 |
+
elif interval.high < 0:
|
| 82 |
+
high = interval.high * (1 + eps) - eps
|
| 83 |
+
else:
|
| 84 |
+
high = interval.high * (1 - eps) - eps
|
| 85 |
+
|
| 86 |
+
return low, high
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class BaseLink(ABC):
|
| 90 |
+
"""Abstract base class for differentiable, invertible link functions.
|
| 91 |
+
|
| 92 |
+
Convention:
|
| 93 |
+
- link function g: raw_prediction = g(y_pred)
|
| 94 |
+
- inverse link h: y_pred = h(raw_prediction)
|
| 95 |
+
|
| 96 |
+
For (generalized) linear models, `raw_prediction = X @ coef` is the so
|
| 97 |
+
called linear predictor, and `y_pred = h(raw_prediction)` is the predicted
|
| 98 |
+
conditional (on X) expected value of the target `y_true`.
|
| 99 |
+
|
| 100 |
+
The methods are not implemented as staticmethods in case a link function needs
|
| 101 |
+
parameters.
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
is_multiclass = False # used for testing only
|
| 105 |
+
|
| 106 |
+
# Usually, raw_prediction may be any real number and y_pred is an open
|
| 107 |
+
# interval.
|
| 108 |
+
# interval_raw_prediction = Interval(-np.inf, np.inf, False, False)
|
| 109 |
+
interval_y_pred = Interval(-np.inf, np.inf, False, False)
|
| 110 |
+
|
| 111 |
+
@abstractmethod
|
| 112 |
+
def link(self, y_pred, out=None):
|
| 113 |
+
"""Compute the link function g(y_pred).
|
| 114 |
+
|
| 115 |
+
The link function maps (predicted) target values to raw predictions,
|
| 116 |
+
i.e. `g(y_pred) = raw_prediction`.
|
| 117 |
+
|
| 118 |
+
Parameters
|
| 119 |
+
----------
|
| 120 |
+
y_pred : array
|
| 121 |
+
Predicted target values.
|
| 122 |
+
out : array
|
| 123 |
+
A location into which the result is stored. If provided, it must
|
| 124 |
+
have a shape that the inputs broadcast to. If not provided or None,
|
| 125 |
+
a freshly-allocated array is returned.
|
| 126 |
+
|
| 127 |
+
Returns
|
| 128 |
+
-------
|
| 129 |
+
out : array
|
| 130 |
+
Output array, element-wise link function.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
@abstractmethod
|
| 134 |
+
def inverse(self, raw_prediction, out=None):
|
| 135 |
+
"""Compute the inverse link function h(raw_prediction).
|
| 136 |
+
|
| 137 |
+
The inverse link function maps raw predictions to predicted target
|
| 138 |
+
values, i.e. `h(raw_prediction) = y_pred`.
|
| 139 |
+
|
| 140 |
+
Parameters
|
| 141 |
+
----------
|
| 142 |
+
raw_prediction : array
|
| 143 |
+
Raw prediction values (in link space).
|
| 144 |
+
out : array
|
| 145 |
+
A location into which the result is stored. If provided, it must
|
| 146 |
+
have a shape that the inputs broadcast to. If not provided or None,
|
| 147 |
+
a freshly-allocated array is returned.
|
| 148 |
+
|
| 149 |
+
Returns
|
| 150 |
+
-------
|
| 151 |
+
out : array
|
| 152 |
+
Output array, element-wise inverse link function.
|
| 153 |
+
"""
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
class IdentityLink(BaseLink):
|
| 157 |
+
"""The identity link function g(x)=x."""
|
| 158 |
+
|
| 159 |
+
def link(self, y_pred, out=None):
|
| 160 |
+
if out is not None:
|
| 161 |
+
np.copyto(out, y_pred)
|
| 162 |
+
return out
|
| 163 |
+
else:
|
| 164 |
+
return y_pred
|
| 165 |
+
|
| 166 |
+
inverse = link
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
class LogLink(BaseLink):
|
| 170 |
+
"""The log link function g(x)=log(x)."""
|
| 171 |
+
|
| 172 |
+
interval_y_pred = Interval(0, np.inf, False, False)
|
| 173 |
+
|
| 174 |
+
def link(self, y_pred, out=None):
|
| 175 |
+
return np.log(y_pred, out=out)
|
| 176 |
+
|
| 177 |
+
def inverse(self, raw_prediction, out=None):
|
| 178 |
+
return np.exp(raw_prediction, out=out)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
class LogitLink(BaseLink):
|
| 182 |
+
"""The logit link function g(x)=logit(x)."""
|
| 183 |
+
|
| 184 |
+
interval_y_pred = Interval(0, 1, False, False)
|
| 185 |
+
|
| 186 |
+
def link(self, y_pred, out=None):
|
| 187 |
+
return logit(y_pred, out=out)
|
| 188 |
+
|
| 189 |
+
def inverse(self, raw_prediction, out=None):
|
| 190 |
+
return expit(raw_prediction, out=out)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
class HalfLogitLink(BaseLink):
|
| 194 |
+
"""Half the logit link function g(x)=1/2 * logit(x).
|
| 195 |
+
|
| 196 |
+
Used for the exponential loss.
|
| 197 |
+
"""
|
| 198 |
+
|
| 199 |
+
interval_y_pred = Interval(0, 1, False, False)
|
| 200 |
+
|
| 201 |
+
def link(self, y_pred, out=None):
|
| 202 |
+
out = logit(y_pred, out=out)
|
| 203 |
+
out *= 0.5
|
| 204 |
+
return out
|
| 205 |
+
|
| 206 |
+
def inverse(self, raw_prediction, out=None):
|
| 207 |
+
return expit(2 * raw_prediction, out)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
class MultinomialLogit(BaseLink):
|
| 211 |
+
"""The symmetric multinomial logit function.
|
| 212 |
+
|
| 213 |
+
Convention:
|
| 214 |
+
- y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
|
| 215 |
+
|
| 216 |
+
Notes:
|
| 217 |
+
- The inverse link h is the softmax function.
|
| 218 |
+
- The sum is over the second axis, i.e. axis=1 (n_classes).
|
| 219 |
+
|
| 220 |
+
We have to choose additional constraints in order to make
|
| 221 |
+
|
| 222 |
+
y_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)
|
| 223 |
+
|
| 224 |
+
for n_classes classes identifiable and invertible.
|
| 225 |
+
We choose the symmetric side constraint where the geometric mean response
|
| 226 |
+
is set as reference category, see [2]:
|
| 227 |
+
|
| 228 |
+
The symmetric multinomial logit link function for a single data point is
|
| 229 |
+
then defined as
|
| 230 |
+
|
| 231 |
+
raw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))
|
| 232 |
+
= log(y_pred[k]) - mean(log(y_pred)).
|
| 233 |
+
|
| 234 |
+
Note that this is equivalent to the definition in [1] and implies mean
|
| 235 |
+
centered raw predictions:
|
| 236 |
+
|
| 237 |
+
sum(raw_prediction[k], k=0..n_classes-1) = 0.
|
| 238 |
+
|
| 239 |
+
For linear models with raw_prediction = X @ coef, this corresponds to
|
| 240 |
+
sum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every
|
| 241 |
+
feature is zero.
|
| 242 |
+
|
| 243 |
+
Reference
|
| 244 |
+
---------
|
| 245 |
+
.. [1] Friedman, Jerome; Hastie, Trevor; Tibshirani, Robert. "Additive
|
| 246 |
+
logistic regression: a statistical view of boosting" Ann. Statist.
|
| 247 |
+
28 (2000), no. 2, 337--407. doi:10.1214/aos/1016218223.
|
| 248 |
+
https://projecteuclid.org/euclid.aos/1016218223
|
| 249 |
+
|
| 250 |
+
.. [2] Zahid, Faisal Maqbool and Gerhard Tutz. "Ridge estimation for
|
| 251 |
+
multinomial logit models with symmetric side constraints."
|
| 252 |
+
Computational Statistics 28 (2013): 1017-1034.
|
| 253 |
+
http://epub.ub.uni-muenchen.de/11001/1/tr067.pdf
|
| 254 |
+
"""
|
| 255 |
+
|
| 256 |
+
is_multiclass = True
|
| 257 |
+
interval_y_pred = Interval(0, 1, False, False)
|
| 258 |
+
|
| 259 |
+
def symmetrize_raw_prediction(self, raw_prediction):
|
| 260 |
+
return raw_prediction - np.mean(raw_prediction, axis=1)[:, np.newaxis]
|
| 261 |
+
|
| 262 |
+
def link(self, y_pred, out=None):
|
| 263 |
+
# geometric mean as reference category
|
| 264 |
+
gm = gmean(y_pred, axis=1)
|
| 265 |
+
return np.log(y_pred / gm[:, np.newaxis], out=out)
|
| 266 |
+
|
| 267 |
+
def inverse(self, raw_prediction, out=None):
|
| 268 |
+
if out is None:
|
| 269 |
+
return softmax(raw_prediction, copy=True)
|
| 270 |
+
else:
|
| 271 |
+
np.copyto(out, raw_prediction)
|
| 272 |
+
softmax(out, copy=False)
|
| 273 |
+
return out
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
_LINKS = {
|
| 277 |
+
"identity": IdentityLink,
|
| 278 |
+
"log": LogLink,
|
| 279 |
+
"logit": LogitLink,
|
| 280 |
+
"half_logit": HalfLogitLink,
|
| 281 |
+
"multinomial_logit": MultinomialLogit,
|
| 282 |
+
}
|
.venv/lib/python3.12/site-packages/sklearn/_loss/loss.py
ADDED
|
@@ -0,0 +1,1181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module contains loss classes suitable for fitting.
|
| 3 |
+
|
| 4 |
+
It is not part of the public API.
|
| 5 |
+
Specific losses are used for regression, binary classification or multiclass
|
| 6 |
+
classification.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
# Authors: The scikit-learn developers
|
| 10 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
| 11 |
+
|
| 12 |
+
# Goals:
|
| 13 |
+
# - Provide a common private module for loss functions/classes.
|
| 14 |
+
# - To be used in:
|
| 15 |
+
# - LogisticRegression
|
| 16 |
+
# - PoissonRegressor, GammaRegressor, TweedieRegressor
|
| 17 |
+
# - HistGradientBoostingRegressor, HistGradientBoostingClassifier
|
| 18 |
+
# - GradientBoostingRegressor, GradientBoostingClassifier
|
| 19 |
+
# - SGDRegressor, SGDClassifier
|
| 20 |
+
# - Replace link module of GLMs.
|
| 21 |
+
|
| 22 |
+
import numbers
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
from scipy.special import xlogy
|
| 26 |
+
|
| 27 |
+
from ..utils import check_scalar
|
| 28 |
+
from ..utils.stats import _weighted_percentile
|
| 29 |
+
from ._loss import (
|
| 30 |
+
CyAbsoluteError,
|
| 31 |
+
CyExponentialLoss,
|
| 32 |
+
CyHalfBinomialLoss,
|
| 33 |
+
CyHalfGammaLoss,
|
| 34 |
+
CyHalfMultinomialLoss,
|
| 35 |
+
CyHalfPoissonLoss,
|
| 36 |
+
CyHalfSquaredError,
|
| 37 |
+
CyHalfTweedieLoss,
|
| 38 |
+
CyHalfTweedieLossIdentity,
|
| 39 |
+
CyHuberLoss,
|
| 40 |
+
CyPinballLoss,
|
| 41 |
+
)
|
| 42 |
+
from .link import (
|
| 43 |
+
HalfLogitLink,
|
| 44 |
+
IdentityLink,
|
| 45 |
+
Interval,
|
| 46 |
+
LogitLink,
|
| 47 |
+
LogLink,
|
| 48 |
+
MultinomialLogit,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Note: The shape of raw_prediction for multiclass classifications are
|
| 53 |
+
# - GradientBoostingClassifier: (n_samples, n_classes)
|
| 54 |
+
# - HistGradientBoostingClassifier: (n_classes, n_samples)
|
| 55 |
+
#
|
| 56 |
+
# Note: Instead of inheritance like
|
| 57 |
+
#
|
| 58 |
+
# class BaseLoss(BaseLink, CyLossFunction):
|
| 59 |
+
# ...
|
| 60 |
+
#
|
| 61 |
+
# # Note: Naturally, we would inherit in the following order
|
| 62 |
+
# # class HalfSquaredError(IdentityLink, CyHalfSquaredError, BaseLoss)
|
| 63 |
+
# # But because of https://github.com/cython/cython/issues/4350 we set BaseLoss as
|
| 64 |
+
# # the last one. This, of course, changes the MRO.
|
| 65 |
+
# class HalfSquaredError(IdentityLink, CyHalfSquaredError, BaseLoss):
|
| 66 |
+
#
|
| 67 |
+
# we use composition. This way we improve maintainability by avoiding the above
|
| 68 |
+
# mentioned Cython edge case and have easier to understand code (which method calls
|
| 69 |
+
# which code).
|
| 70 |
+
class BaseLoss:
|
| 71 |
+
"""Base class for a loss function of 1-dimensional targets.
|
| 72 |
+
|
| 73 |
+
Conventions:
|
| 74 |
+
|
| 75 |
+
- y_true.shape = sample_weight.shape = (n_samples,)
|
| 76 |
+
- y_pred.shape = raw_prediction.shape = (n_samples,)
|
| 77 |
+
- If is_multiclass is true (multiclass classification), then
|
| 78 |
+
y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
|
| 79 |
+
Note that this corresponds to the return value of decision_function.
|
| 80 |
+
|
| 81 |
+
y_true, y_pred, sample_weight and raw_prediction must either be all float64
|
| 82 |
+
or all float32.
|
| 83 |
+
gradient and hessian must be either both float64 or both float32.
|
| 84 |
+
|
| 85 |
+
Note that y_pred = link.inverse(raw_prediction).
|
| 86 |
+
|
| 87 |
+
Specific loss classes can inherit specific link classes to satisfy
|
| 88 |
+
BaseLink's abstractmethods.
|
| 89 |
+
|
| 90 |
+
Parameters
|
| 91 |
+
----------
|
| 92 |
+
sample_weight : {None, ndarray}
|
| 93 |
+
If sample_weight is None, the hessian might be constant.
|
| 94 |
+
n_classes : {None, int}
|
| 95 |
+
The number of classes for classification, else None.
|
| 96 |
+
|
| 97 |
+
Attributes
|
| 98 |
+
----------
|
| 99 |
+
closs: CyLossFunction
|
| 100 |
+
link : BaseLink
|
| 101 |
+
interval_y_true : Interval
|
| 102 |
+
Valid interval for y_true
|
| 103 |
+
interval_y_pred : Interval
|
| 104 |
+
Valid Interval for y_pred
|
| 105 |
+
differentiable : bool
|
| 106 |
+
Indicates whether or not loss function is differentiable in
|
| 107 |
+
raw_prediction everywhere.
|
| 108 |
+
need_update_leaves_values : bool
|
| 109 |
+
Indicates whether decision trees in gradient boosting need to uptade
|
| 110 |
+
leave values after having been fit to the (negative) gradients.
|
| 111 |
+
approx_hessian : bool
|
| 112 |
+
Indicates whether the hessian is approximated or exact. If,
|
| 113 |
+
approximated, it should be larger or equal to the exact one.
|
| 114 |
+
constant_hessian : bool
|
| 115 |
+
Indicates whether the hessian is one for this loss.
|
| 116 |
+
is_multiclass : bool
|
| 117 |
+
Indicates whether n_classes > 2 is allowed.
|
| 118 |
+
"""
|
| 119 |
+
|
| 120 |
+
# For gradient boosted decision trees:
|
| 121 |
+
# This variable indicates whether the loss requires the leaves values to
|
| 122 |
+
# be updated once the tree has been trained. The trees are trained to
|
| 123 |
+
# predict a Newton-Raphson step (see grower._finalize_leaf()). But for
|
| 124 |
+
# some losses (e.g. least absolute deviation) we need to adjust the tree
|
| 125 |
+
# values to account for the "line search" of the gradient descent
|
| 126 |
+
# procedure. See the original paper Greedy Function Approximation: A
|
| 127 |
+
# Gradient Boosting Machine by Friedman
|
| 128 |
+
# (https://statweb.stanford.edu/~jhf/ftp/trebst.pdf) for the theory.
|
| 129 |
+
differentiable = True
|
| 130 |
+
need_update_leaves_values = False
|
| 131 |
+
is_multiclass = False
|
| 132 |
+
|
| 133 |
+
def __init__(self, closs, link, n_classes=None):
|
| 134 |
+
self.closs = closs
|
| 135 |
+
self.link = link
|
| 136 |
+
self.approx_hessian = False
|
| 137 |
+
self.constant_hessian = False
|
| 138 |
+
self.n_classes = n_classes
|
| 139 |
+
self.interval_y_true = Interval(-np.inf, np.inf, False, False)
|
| 140 |
+
self.interval_y_pred = self.link.interval_y_pred
|
| 141 |
+
|
| 142 |
+
def in_y_true_range(self, y):
|
| 143 |
+
"""Return True if y is in the valid range of y_true.
|
| 144 |
+
|
| 145 |
+
Parameters
|
| 146 |
+
----------
|
| 147 |
+
y : ndarray
|
| 148 |
+
"""
|
| 149 |
+
return self.interval_y_true.includes(y)
|
| 150 |
+
|
| 151 |
+
def in_y_pred_range(self, y):
|
| 152 |
+
"""Return True if y is in the valid range of y_pred.
|
| 153 |
+
|
| 154 |
+
Parameters
|
| 155 |
+
----------
|
| 156 |
+
y : ndarray
|
| 157 |
+
"""
|
| 158 |
+
return self.interval_y_pred.includes(y)
|
| 159 |
+
|
| 160 |
+
def loss(
|
| 161 |
+
self,
|
| 162 |
+
y_true,
|
| 163 |
+
raw_prediction,
|
| 164 |
+
sample_weight=None,
|
| 165 |
+
loss_out=None,
|
| 166 |
+
n_threads=1,
|
| 167 |
+
):
|
| 168 |
+
"""Compute the pointwise loss value for each input.
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 173 |
+
Observed, true target values.
|
| 174 |
+
raw_prediction : C-contiguous array of shape (n_samples,) or array of \
|
| 175 |
+
shape (n_samples, n_classes)
|
| 176 |
+
Raw prediction values (in link space).
|
| 177 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 178 |
+
Sample weights.
|
| 179 |
+
loss_out : None or C-contiguous array of shape (n_samples,)
|
| 180 |
+
A location into which the result is stored. If None, a new array
|
| 181 |
+
might be created.
|
| 182 |
+
n_threads : int, default=1
|
| 183 |
+
Might use openmp thread parallelism.
|
| 184 |
+
|
| 185 |
+
Returns
|
| 186 |
+
-------
|
| 187 |
+
loss : array of shape (n_samples,)
|
| 188 |
+
Element-wise loss function.
|
| 189 |
+
"""
|
| 190 |
+
if loss_out is None:
|
| 191 |
+
loss_out = np.empty_like(y_true)
|
| 192 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 193 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 194 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 195 |
+
|
| 196 |
+
self.closs.loss(
|
| 197 |
+
y_true=y_true,
|
| 198 |
+
raw_prediction=raw_prediction,
|
| 199 |
+
sample_weight=sample_weight,
|
| 200 |
+
loss_out=loss_out,
|
| 201 |
+
n_threads=n_threads,
|
| 202 |
+
)
|
| 203 |
+
return loss_out
|
| 204 |
+
|
| 205 |
+
def loss_gradient(
|
| 206 |
+
self,
|
| 207 |
+
y_true,
|
| 208 |
+
raw_prediction,
|
| 209 |
+
sample_weight=None,
|
| 210 |
+
loss_out=None,
|
| 211 |
+
gradient_out=None,
|
| 212 |
+
n_threads=1,
|
| 213 |
+
):
|
| 214 |
+
"""Compute loss and gradient w.r.t. raw_prediction for each input.
|
| 215 |
+
|
| 216 |
+
Parameters
|
| 217 |
+
----------
|
| 218 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 219 |
+
Observed, true target values.
|
| 220 |
+
raw_prediction : C-contiguous array of shape (n_samples,) or array of \
|
| 221 |
+
shape (n_samples, n_classes)
|
| 222 |
+
Raw prediction values (in link space).
|
| 223 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 224 |
+
Sample weights.
|
| 225 |
+
loss_out : None or C-contiguous array of shape (n_samples,)
|
| 226 |
+
A location into which the loss is stored. If None, a new array
|
| 227 |
+
might be created.
|
| 228 |
+
gradient_out : None or C-contiguous array of shape (n_samples,) or array \
|
| 229 |
+
of shape (n_samples, n_classes)
|
| 230 |
+
A location into which the gradient is stored. If None, a new array
|
| 231 |
+
might be created.
|
| 232 |
+
n_threads : int, default=1
|
| 233 |
+
Might use openmp thread parallelism.
|
| 234 |
+
|
| 235 |
+
Returns
|
| 236 |
+
-------
|
| 237 |
+
loss : array of shape (n_samples,)
|
| 238 |
+
Element-wise loss function.
|
| 239 |
+
|
| 240 |
+
gradient : array of shape (n_samples,) or (n_samples, n_classes)
|
| 241 |
+
Element-wise gradients.
|
| 242 |
+
"""
|
| 243 |
+
if loss_out is None:
|
| 244 |
+
if gradient_out is None:
|
| 245 |
+
loss_out = np.empty_like(y_true)
|
| 246 |
+
gradient_out = np.empty_like(raw_prediction)
|
| 247 |
+
else:
|
| 248 |
+
loss_out = np.empty_like(y_true, dtype=gradient_out.dtype)
|
| 249 |
+
elif gradient_out is None:
|
| 250 |
+
gradient_out = np.empty_like(raw_prediction, dtype=loss_out.dtype)
|
| 251 |
+
|
| 252 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 253 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 254 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 255 |
+
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
|
| 256 |
+
gradient_out = gradient_out.squeeze(1)
|
| 257 |
+
|
| 258 |
+
self.closs.loss_gradient(
|
| 259 |
+
y_true=y_true,
|
| 260 |
+
raw_prediction=raw_prediction,
|
| 261 |
+
sample_weight=sample_weight,
|
| 262 |
+
loss_out=loss_out,
|
| 263 |
+
gradient_out=gradient_out,
|
| 264 |
+
n_threads=n_threads,
|
| 265 |
+
)
|
| 266 |
+
return loss_out, gradient_out
|
| 267 |
+
|
| 268 |
+
def gradient(
|
| 269 |
+
self,
|
| 270 |
+
y_true,
|
| 271 |
+
raw_prediction,
|
| 272 |
+
sample_weight=None,
|
| 273 |
+
gradient_out=None,
|
| 274 |
+
n_threads=1,
|
| 275 |
+
):
|
| 276 |
+
"""Compute gradient of loss w.r.t raw_prediction for each input.
|
| 277 |
+
|
| 278 |
+
Parameters
|
| 279 |
+
----------
|
| 280 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 281 |
+
Observed, true target values.
|
| 282 |
+
raw_prediction : C-contiguous array of shape (n_samples,) or array of \
|
| 283 |
+
shape (n_samples, n_classes)
|
| 284 |
+
Raw prediction values (in link space).
|
| 285 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 286 |
+
Sample weights.
|
| 287 |
+
gradient_out : None or C-contiguous array of shape (n_samples,) or array \
|
| 288 |
+
of shape (n_samples, n_classes)
|
| 289 |
+
A location into which the result is stored. If None, a new array
|
| 290 |
+
might be created.
|
| 291 |
+
n_threads : int, default=1
|
| 292 |
+
Might use openmp thread parallelism.
|
| 293 |
+
|
| 294 |
+
Returns
|
| 295 |
+
-------
|
| 296 |
+
gradient : array of shape (n_samples,) or (n_samples, n_classes)
|
| 297 |
+
Element-wise gradients.
|
| 298 |
+
"""
|
| 299 |
+
if gradient_out is None:
|
| 300 |
+
gradient_out = np.empty_like(raw_prediction)
|
| 301 |
+
|
| 302 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 303 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 304 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 305 |
+
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
|
| 306 |
+
gradient_out = gradient_out.squeeze(1)
|
| 307 |
+
|
| 308 |
+
self.closs.gradient(
|
| 309 |
+
y_true=y_true,
|
| 310 |
+
raw_prediction=raw_prediction,
|
| 311 |
+
sample_weight=sample_weight,
|
| 312 |
+
gradient_out=gradient_out,
|
| 313 |
+
n_threads=n_threads,
|
| 314 |
+
)
|
| 315 |
+
return gradient_out
|
| 316 |
+
|
| 317 |
+
def gradient_hessian(
|
| 318 |
+
self,
|
| 319 |
+
y_true,
|
| 320 |
+
raw_prediction,
|
| 321 |
+
sample_weight=None,
|
| 322 |
+
gradient_out=None,
|
| 323 |
+
hessian_out=None,
|
| 324 |
+
n_threads=1,
|
| 325 |
+
):
|
| 326 |
+
"""Compute gradient and hessian of loss w.r.t raw_prediction.
|
| 327 |
+
|
| 328 |
+
Parameters
|
| 329 |
+
----------
|
| 330 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 331 |
+
Observed, true target values.
|
| 332 |
+
raw_prediction : C-contiguous array of shape (n_samples,) or array of \
|
| 333 |
+
shape (n_samples, n_classes)
|
| 334 |
+
Raw prediction values (in link space).
|
| 335 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 336 |
+
Sample weights.
|
| 337 |
+
gradient_out : None or C-contiguous array of shape (n_samples,) or array \
|
| 338 |
+
of shape (n_samples, n_classes)
|
| 339 |
+
A location into which the gradient is stored. If None, a new array
|
| 340 |
+
might be created.
|
| 341 |
+
hessian_out : None or C-contiguous array of shape (n_samples,) or array \
|
| 342 |
+
of shape (n_samples, n_classes)
|
| 343 |
+
A location into which the hessian is stored. If None, a new array
|
| 344 |
+
might be created.
|
| 345 |
+
n_threads : int, default=1
|
| 346 |
+
Might use openmp thread parallelism.
|
| 347 |
+
|
| 348 |
+
Returns
|
| 349 |
+
-------
|
| 350 |
+
gradient : arrays of shape (n_samples,) or (n_samples, n_classes)
|
| 351 |
+
Element-wise gradients.
|
| 352 |
+
|
| 353 |
+
hessian : arrays of shape (n_samples,) or (n_samples, n_classes)
|
| 354 |
+
Element-wise hessians.
|
| 355 |
+
"""
|
| 356 |
+
if gradient_out is None:
|
| 357 |
+
if hessian_out is None:
|
| 358 |
+
gradient_out = np.empty_like(raw_prediction)
|
| 359 |
+
hessian_out = np.empty_like(raw_prediction)
|
| 360 |
+
else:
|
| 361 |
+
gradient_out = np.empty_like(hessian_out)
|
| 362 |
+
elif hessian_out is None:
|
| 363 |
+
hessian_out = np.empty_like(gradient_out)
|
| 364 |
+
|
| 365 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 366 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 367 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 368 |
+
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
|
| 369 |
+
gradient_out = gradient_out.squeeze(1)
|
| 370 |
+
if hessian_out.ndim == 2 and hessian_out.shape[1] == 1:
|
| 371 |
+
hessian_out = hessian_out.squeeze(1)
|
| 372 |
+
|
| 373 |
+
self.closs.gradient_hessian(
|
| 374 |
+
y_true=y_true,
|
| 375 |
+
raw_prediction=raw_prediction,
|
| 376 |
+
sample_weight=sample_weight,
|
| 377 |
+
gradient_out=gradient_out,
|
| 378 |
+
hessian_out=hessian_out,
|
| 379 |
+
n_threads=n_threads,
|
| 380 |
+
)
|
| 381 |
+
return gradient_out, hessian_out
|
| 382 |
+
|
| 383 |
+
def __call__(self, y_true, raw_prediction, sample_weight=None, n_threads=1):
|
| 384 |
+
"""Compute the weighted average loss.
|
| 385 |
+
|
| 386 |
+
Parameters
|
| 387 |
+
----------
|
| 388 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 389 |
+
Observed, true target values.
|
| 390 |
+
raw_prediction : C-contiguous array of shape (n_samples,) or array of \
|
| 391 |
+
shape (n_samples, n_classes)
|
| 392 |
+
Raw prediction values (in link space).
|
| 393 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 394 |
+
Sample weights.
|
| 395 |
+
n_threads : int, default=1
|
| 396 |
+
Might use openmp thread parallelism.
|
| 397 |
+
|
| 398 |
+
Returns
|
| 399 |
+
-------
|
| 400 |
+
loss : float
|
| 401 |
+
Mean or averaged loss function.
|
| 402 |
+
"""
|
| 403 |
+
return np.average(
|
| 404 |
+
self.loss(
|
| 405 |
+
y_true=y_true,
|
| 406 |
+
raw_prediction=raw_prediction,
|
| 407 |
+
sample_weight=None,
|
| 408 |
+
loss_out=None,
|
| 409 |
+
n_threads=n_threads,
|
| 410 |
+
),
|
| 411 |
+
weights=sample_weight,
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
def fit_intercept_only(self, y_true, sample_weight=None):
|
| 415 |
+
"""Compute raw_prediction of an intercept-only model.
|
| 416 |
+
|
| 417 |
+
This can be used as initial estimates of predictions, i.e. before the
|
| 418 |
+
first iteration in fit.
|
| 419 |
+
|
| 420 |
+
Parameters
|
| 421 |
+
----------
|
| 422 |
+
y_true : array-like of shape (n_samples,)
|
| 423 |
+
Observed, true target values.
|
| 424 |
+
sample_weight : None or array of shape (n_samples,)
|
| 425 |
+
Sample weights.
|
| 426 |
+
|
| 427 |
+
Returns
|
| 428 |
+
-------
|
| 429 |
+
raw_prediction : numpy scalar or array of shape (n_classes,)
|
| 430 |
+
Raw predictions of an intercept-only model.
|
| 431 |
+
"""
|
| 432 |
+
# As default, take weighted average of the target over the samples
|
| 433 |
+
# axis=0 and then transform into link-scale (raw_prediction).
|
| 434 |
+
y_pred = np.average(y_true, weights=sample_weight, axis=0)
|
| 435 |
+
eps = 10 * np.finfo(y_pred.dtype).eps
|
| 436 |
+
|
| 437 |
+
if self.interval_y_pred.low == -np.inf:
|
| 438 |
+
a_min = None
|
| 439 |
+
elif self.interval_y_pred.low_inclusive:
|
| 440 |
+
a_min = self.interval_y_pred.low
|
| 441 |
+
else:
|
| 442 |
+
a_min = self.interval_y_pred.low + eps
|
| 443 |
+
|
| 444 |
+
if self.interval_y_pred.high == np.inf:
|
| 445 |
+
a_max = None
|
| 446 |
+
elif self.interval_y_pred.high_inclusive:
|
| 447 |
+
a_max = self.interval_y_pred.high
|
| 448 |
+
else:
|
| 449 |
+
a_max = self.interval_y_pred.high - eps
|
| 450 |
+
|
| 451 |
+
if a_min is None and a_max is None:
|
| 452 |
+
return self.link.link(y_pred)
|
| 453 |
+
else:
|
| 454 |
+
return self.link.link(np.clip(y_pred, a_min, a_max))
|
| 455 |
+
|
| 456 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 457 |
+
"""Calculate term dropped in loss.
|
| 458 |
+
|
| 459 |
+
With this term added, the loss of perfect predictions is zero.
|
| 460 |
+
"""
|
| 461 |
+
return np.zeros_like(y_true)
|
| 462 |
+
|
| 463 |
+
def init_gradient_and_hessian(self, n_samples, dtype=np.float64, order="F"):
|
| 464 |
+
"""Initialize arrays for gradients and hessians.
|
| 465 |
+
|
| 466 |
+
Unless hessians are constant, arrays are initialized with undefined values.
|
| 467 |
+
|
| 468 |
+
Parameters
|
| 469 |
+
----------
|
| 470 |
+
n_samples : int
|
| 471 |
+
The number of samples, usually passed to `fit()`.
|
| 472 |
+
dtype : {np.float64, np.float32}, default=np.float64
|
| 473 |
+
The dtype of the arrays gradient and hessian.
|
| 474 |
+
order : {'C', 'F'}, default='F'
|
| 475 |
+
Order of the arrays gradient and hessian. The default 'F' makes the arrays
|
| 476 |
+
contiguous along samples.
|
| 477 |
+
|
| 478 |
+
Returns
|
| 479 |
+
-------
|
| 480 |
+
gradient : C-contiguous array of shape (n_samples,) or array of shape \
|
| 481 |
+
(n_samples, n_classes)
|
| 482 |
+
Empty array (allocated but not initialized) to be used as argument
|
| 483 |
+
gradient_out.
|
| 484 |
+
hessian : C-contiguous array of shape (n_samples,), array of shape
|
| 485 |
+
(n_samples, n_classes) or shape (1,)
|
| 486 |
+
Empty (allocated but not initialized) array to be used as argument
|
| 487 |
+
hessian_out.
|
| 488 |
+
If constant_hessian is True (e.g. `HalfSquaredError`), the array is
|
| 489 |
+
initialized to ``1``.
|
| 490 |
+
"""
|
| 491 |
+
if dtype not in (np.float32, np.float64):
|
| 492 |
+
raise ValueError(
|
| 493 |
+
"Valid options for 'dtype' are np.float32 and np.float64. "
|
| 494 |
+
f"Got dtype={dtype} instead."
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
if self.is_multiclass:
|
| 498 |
+
shape = (n_samples, self.n_classes)
|
| 499 |
+
else:
|
| 500 |
+
shape = (n_samples,)
|
| 501 |
+
gradient = np.empty(shape=shape, dtype=dtype, order=order)
|
| 502 |
+
|
| 503 |
+
if self.constant_hessian:
|
| 504 |
+
# If the hessians are constant, we consider them equal to 1.
|
| 505 |
+
# - This is correct for HalfSquaredError
|
| 506 |
+
# - For AbsoluteError, hessians are actually 0, but they are
|
| 507 |
+
# always ignored anyway.
|
| 508 |
+
hessian = np.ones(shape=(1,), dtype=dtype)
|
| 509 |
+
else:
|
| 510 |
+
hessian = np.empty(shape=shape, dtype=dtype, order=order)
|
| 511 |
+
|
| 512 |
+
return gradient, hessian
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
# Note: Naturally, we would inherit in the following order
|
| 516 |
+
# class HalfSquaredError(IdentityLink, CyHalfSquaredError, BaseLoss)
|
| 517 |
+
# But because of https://github.com/cython/cython/issues/4350 we
|
| 518 |
+
# set BaseLoss as the last one. This, of course, changes the MRO.
|
| 519 |
+
class HalfSquaredError(BaseLoss):
|
| 520 |
+
"""Half squared error with identity link, for regression.
|
| 521 |
+
|
| 522 |
+
Domain:
|
| 523 |
+
y_true and y_pred all real numbers
|
| 524 |
+
|
| 525 |
+
Link:
|
| 526 |
+
y_pred = raw_prediction
|
| 527 |
+
|
| 528 |
+
For a given sample x_i, half squared error is defined as::
|
| 529 |
+
|
| 530 |
+
loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2
|
| 531 |
+
|
| 532 |
+
The factor of 0.5 simplifies the computation of gradients and results in a
|
| 533 |
+
unit hessian (and is consistent with what is done in LightGBM). It is also
|
| 534 |
+
half the Normal distribution deviance.
|
| 535 |
+
"""
|
| 536 |
+
|
| 537 |
+
def __init__(self, sample_weight=None):
|
| 538 |
+
super().__init__(closs=CyHalfSquaredError(), link=IdentityLink())
|
| 539 |
+
self.constant_hessian = sample_weight is None
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
class AbsoluteError(BaseLoss):
|
| 543 |
+
"""Absolute error with identity link, for regression.
|
| 544 |
+
|
| 545 |
+
Domain:
|
| 546 |
+
y_true and y_pred all real numbers
|
| 547 |
+
|
| 548 |
+
Link:
|
| 549 |
+
y_pred = raw_prediction
|
| 550 |
+
|
| 551 |
+
For a given sample x_i, the absolute error is defined as::
|
| 552 |
+
|
| 553 |
+
loss(x_i) = |y_true_i - raw_prediction_i|
|
| 554 |
+
|
| 555 |
+
Note that the exact hessian = 0 almost everywhere (except at one point, therefore
|
| 556 |
+
differentiable = False). Optimization routines like in HGBT, however, need a
|
| 557 |
+
hessian > 0. Therefore, we assign 1.
|
| 558 |
+
"""
|
| 559 |
+
|
| 560 |
+
differentiable = False
|
| 561 |
+
need_update_leaves_values = True
|
| 562 |
+
|
| 563 |
+
def __init__(self, sample_weight=None):
|
| 564 |
+
super().__init__(closs=CyAbsoluteError(), link=IdentityLink())
|
| 565 |
+
self.approx_hessian = True
|
| 566 |
+
self.constant_hessian = sample_weight is None
|
| 567 |
+
|
| 568 |
+
def fit_intercept_only(self, y_true, sample_weight=None):
|
| 569 |
+
"""Compute raw_prediction of an intercept-only model.
|
| 570 |
+
|
| 571 |
+
This is the weighted median of the target, i.e. over the samples
|
| 572 |
+
axis=0.
|
| 573 |
+
"""
|
| 574 |
+
if sample_weight is None:
|
| 575 |
+
return np.median(y_true, axis=0)
|
| 576 |
+
else:
|
| 577 |
+
return _weighted_percentile(y_true, sample_weight, 50)
|
| 578 |
+
|
| 579 |
+
|
| 580 |
+
class PinballLoss(BaseLoss):
|
| 581 |
+
"""Quantile loss aka pinball loss, for regression.
|
| 582 |
+
|
| 583 |
+
Domain:
|
| 584 |
+
y_true and y_pred all real numbers
|
| 585 |
+
quantile in (0, 1)
|
| 586 |
+
|
| 587 |
+
Link:
|
| 588 |
+
y_pred = raw_prediction
|
| 589 |
+
|
| 590 |
+
For a given sample x_i, the pinball loss is defined as::
|
| 591 |
+
|
| 592 |
+
loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)
|
| 593 |
+
|
| 594 |
+
rho_{quantile}(u) = u * (quantile - 1_{u<0})
|
| 595 |
+
= -u *(1 - quantile) if u < 0
|
| 596 |
+
u * quantile if u >= 0
|
| 597 |
+
|
| 598 |
+
Note: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().
|
| 599 |
+
|
| 600 |
+
Note that the exact hessian = 0 almost everywhere (except at one point, therefore
|
| 601 |
+
differentiable = False). Optimization routines like in HGBT, however, need a
|
| 602 |
+
hessian > 0. Therefore, we assign 1.
|
| 603 |
+
|
| 604 |
+
Additional Attributes
|
| 605 |
+
---------------------
|
| 606 |
+
quantile : float
|
| 607 |
+
The quantile level of the quantile to be estimated. Must be in range (0, 1).
|
| 608 |
+
"""
|
| 609 |
+
|
| 610 |
+
differentiable = False
|
| 611 |
+
need_update_leaves_values = True
|
| 612 |
+
|
| 613 |
+
def __init__(self, sample_weight=None, quantile=0.5):
|
| 614 |
+
check_scalar(
|
| 615 |
+
quantile,
|
| 616 |
+
"quantile",
|
| 617 |
+
target_type=numbers.Real,
|
| 618 |
+
min_val=0,
|
| 619 |
+
max_val=1,
|
| 620 |
+
include_boundaries="neither",
|
| 621 |
+
)
|
| 622 |
+
super().__init__(
|
| 623 |
+
closs=CyPinballLoss(quantile=float(quantile)),
|
| 624 |
+
link=IdentityLink(),
|
| 625 |
+
)
|
| 626 |
+
self.approx_hessian = True
|
| 627 |
+
self.constant_hessian = sample_weight is None
|
| 628 |
+
|
| 629 |
+
def fit_intercept_only(self, y_true, sample_weight=None):
|
| 630 |
+
"""Compute raw_prediction of an intercept-only model.
|
| 631 |
+
|
| 632 |
+
This is the weighted median of the target, i.e. over the samples
|
| 633 |
+
axis=0.
|
| 634 |
+
"""
|
| 635 |
+
if sample_weight is None:
|
| 636 |
+
return np.percentile(y_true, 100 * self.closs.quantile, axis=0)
|
| 637 |
+
else:
|
| 638 |
+
return _weighted_percentile(
|
| 639 |
+
y_true, sample_weight, 100 * self.closs.quantile
|
| 640 |
+
)
|
| 641 |
+
|
| 642 |
+
|
| 643 |
+
class HuberLoss(BaseLoss):
|
| 644 |
+
"""Huber loss, for regression.
|
| 645 |
+
|
| 646 |
+
Domain:
|
| 647 |
+
y_true and y_pred all real numbers
|
| 648 |
+
quantile in (0, 1)
|
| 649 |
+
|
| 650 |
+
Link:
|
| 651 |
+
y_pred = raw_prediction
|
| 652 |
+
|
| 653 |
+
For a given sample x_i, the Huber loss is defined as::
|
| 654 |
+
|
| 655 |
+
loss(x_i) = 1/2 * abserr**2 if abserr <= delta
|
| 656 |
+
delta * (abserr - delta/2) if abserr > delta
|
| 657 |
+
|
| 658 |
+
abserr = |y_true_i - raw_prediction_i|
|
| 659 |
+
delta = quantile(abserr, self.quantile)
|
| 660 |
+
|
| 661 |
+
Note: HuberLoss(quantile=1) equals HalfSquaredError and HuberLoss(quantile=0)
|
| 662 |
+
equals delta * (AbsoluteError() - delta/2).
|
| 663 |
+
|
| 664 |
+
Additional Attributes
|
| 665 |
+
---------------------
|
| 666 |
+
quantile : float
|
| 667 |
+
The quantile level which defines the breaking point `delta` to distinguish
|
| 668 |
+
between absolute error and squared error. Must be in range (0, 1).
|
| 669 |
+
|
| 670 |
+
Reference
|
| 671 |
+
---------
|
| 672 |
+
.. [1] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient
|
| 673 |
+
boosting machine <10.1214/aos/1013203451>`.
|
| 674 |
+
Annals of Statistics, 29, 1189-1232.
|
| 675 |
+
"""
|
| 676 |
+
|
| 677 |
+
differentiable = False
|
| 678 |
+
need_update_leaves_values = True
|
| 679 |
+
|
| 680 |
+
def __init__(self, sample_weight=None, quantile=0.9, delta=0.5):
|
| 681 |
+
check_scalar(
|
| 682 |
+
quantile,
|
| 683 |
+
"quantile",
|
| 684 |
+
target_type=numbers.Real,
|
| 685 |
+
min_val=0,
|
| 686 |
+
max_val=1,
|
| 687 |
+
include_boundaries="neither",
|
| 688 |
+
)
|
| 689 |
+
self.quantile = quantile # This is better stored outside of Cython.
|
| 690 |
+
super().__init__(
|
| 691 |
+
closs=CyHuberLoss(delta=float(delta)),
|
| 692 |
+
link=IdentityLink(),
|
| 693 |
+
)
|
| 694 |
+
self.approx_hessian = True
|
| 695 |
+
self.constant_hessian = False
|
| 696 |
+
|
| 697 |
+
def fit_intercept_only(self, y_true, sample_weight=None):
|
| 698 |
+
"""Compute raw_prediction of an intercept-only model.
|
| 699 |
+
|
| 700 |
+
This is the weighted median of the target, i.e. over the samples
|
| 701 |
+
axis=0.
|
| 702 |
+
"""
|
| 703 |
+
# See formula before algo 4 in Friedman (2001), but we apply it to y_true,
|
| 704 |
+
# not to the residual y_true - raw_prediction. An estimator like
|
| 705 |
+
# HistGradientBoostingRegressor might then call it on the residual, e.g.
|
| 706 |
+
# fit_intercept_only(y_true - raw_prediction).
|
| 707 |
+
if sample_weight is None:
|
| 708 |
+
median = np.percentile(y_true, 50, axis=0)
|
| 709 |
+
else:
|
| 710 |
+
median = _weighted_percentile(y_true, sample_weight, 50)
|
| 711 |
+
diff = y_true - median
|
| 712 |
+
term = np.sign(diff) * np.minimum(self.closs.delta, np.abs(diff))
|
| 713 |
+
return median + np.average(term, weights=sample_weight)
|
| 714 |
+
|
| 715 |
+
|
| 716 |
+
class HalfPoissonLoss(BaseLoss):
|
| 717 |
+
"""Half Poisson deviance loss with log-link, for regression.
|
| 718 |
+
|
| 719 |
+
Domain:
|
| 720 |
+
y_true in non-negative real numbers
|
| 721 |
+
y_pred in positive real numbers
|
| 722 |
+
|
| 723 |
+
Link:
|
| 724 |
+
y_pred = exp(raw_prediction)
|
| 725 |
+
|
| 726 |
+
For a given sample x_i, half the Poisson deviance is defined as::
|
| 727 |
+
|
| 728 |
+
loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))
|
| 729 |
+
- y_true_i + exp(raw_prediction_i)
|
| 730 |
+
|
| 731 |
+
Half the Poisson deviance is actually the negative log-likelihood up to
|
| 732 |
+
constant terms (not involving raw_prediction) and simplifies the
|
| 733 |
+
computation of the gradients.
|
| 734 |
+
We also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.
|
| 735 |
+
"""
|
| 736 |
+
|
| 737 |
+
def __init__(self, sample_weight=None):
|
| 738 |
+
super().__init__(closs=CyHalfPoissonLoss(), link=LogLink())
|
| 739 |
+
self.interval_y_true = Interval(0, np.inf, True, False)
|
| 740 |
+
|
| 741 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 742 |
+
term = xlogy(y_true, y_true) - y_true
|
| 743 |
+
if sample_weight is not None:
|
| 744 |
+
term *= sample_weight
|
| 745 |
+
return term
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
class HalfGammaLoss(BaseLoss):
|
| 749 |
+
"""Half Gamma deviance loss with log-link, for regression.
|
| 750 |
+
|
| 751 |
+
Domain:
|
| 752 |
+
y_true and y_pred in positive real numbers
|
| 753 |
+
|
| 754 |
+
Link:
|
| 755 |
+
y_pred = exp(raw_prediction)
|
| 756 |
+
|
| 757 |
+
For a given sample x_i, half Gamma deviance loss is defined as::
|
| 758 |
+
|
| 759 |
+
loss(x_i) = log(exp(raw_prediction_i)/y_true_i)
|
| 760 |
+
+ y_true/exp(raw_prediction_i) - 1
|
| 761 |
+
|
| 762 |
+
Half the Gamma deviance is actually proportional to the negative log-
|
| 763 |
+
likelihood up to constant terms (not involving raw_prediction) and
|
| 764 |
+
simplifies the computation of the gradients.
|
| 765 |
+
We also skip the constant term `-log(y_true_i) - 1`.
|
| 766 |
+
"""
|
| 767 |
+
|
| 768 |
+
def __init__(self, sample_weight=None):
|
| 769 |
+
super().__init__(closs=CyHalfGammaLoss(), link=LogLink())
|
| 770 |
+
self.interval_y_true = Interval(0, np.inf, False, False)
|
| 771 |
+
|
| 772 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 773 |
+
term = -np.log(y_true) - 1
|
| 774 |
+
if sample_weight is not None:
|
| 775 |
+
term *= sample_weight
|
| 776 |
+
return term
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
class HalfTweedieLoss(BaseLoss):
|
| 780 |
+
"""Half Tweedie deviance loss with log-link, for regression.
|
| 781 |
+
|
| 782 |
+
Domain:
|
| 783 |
+
y_true in real numbers for power <= 0
|
| 784 |
+
y_true in non-negative real numbers for 0 < power < 2
|
| 785 |
+
y_true in positive real numbers for 2 <= power
|
| 786 |
+
y_pred in positive real numbers
|
| 787 |
+
power in real numbers
|
| 788 |
+
|
| 789 |
+
Link:
|
| 790 |
+
y_pred = exp(raw_prediction)
|
| 791 |
+
|
| 792 |
+
For a given sample x_i, half Tweedie deviance loss with p=power is defined
|
| 793 |
+
as::
|
| 794 |
+
|
| 795 |
+
loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
|
| 796 |
+
- y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)
|
| 797 |
+
+ exp(raw_prediction_i)**(2-p) / (2-p)
|
| 798 |
+
|
| 799 |
+
Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,
|
| 800 |
+
HalfPoissonLoss and HalfGammaLoss.
|
| 801 |
+
|
| 802 |
+
We also skip constant terms, but those are different for p=0, 1, 2.
|
| 803 |
+
Therefore, the loss is not continuous in `power`.
|
| 804 |
+
|
| 805 |
+
Note furthermore that although no Tweedie distribution exists for
|
| 806 |
+
0 < power < 1, it still gives a strictly consistent scoring function for
|
| 807 |
+
the expectation.
|
| 808 |
+
"""
|
| 809 |
+
|
| 810 |
+
def __init__(self, sample_weight=None, power=1.5):
|
| 811 |
+
super().__init__(
|
| 812 |
+
closs=CyHalfTweedieLoss(power=float(power)),
|
| 813 |
+
link=LogLink(),
|
| 814 |
+
)
|
| 815 |
+
if self.closs.power <= 0:
|
| 816 |
+
self.interval_y_true = Interval(-np.inf, np.inf, False, False)
|
| 817 |
+
elif self.closs.power < 2:
|
| 818 |
+
self.interval_y_true = Interval(0, np.inf, True, False)
|
| 819 |
+
else:
|
| 820 |
+
self.interval_y_true = Interval(0, np.inf, False, False)
|
| 821 |
+
|
| 822 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 823 |
+
if self.closs.power == 0:
|
| 824 |
+
return HalfSquaredError().constant_to_optimal_zero(
|
| 825 |
+
y_true=y_true, sample_weight=sample_weight
|
| 826 |
+
)
|
| 827 |
+
elif self.closs.power == 1:
|
| 828 |
+
return HalfPoissonLoss().constant_to_optimal_zero(
|
| 829 |
+
y_true=y_true, sample_weight=sample_weight
|
| 830 |
+
)
|
| 831 |
+
elif self.closs.power == 2:
|
| 832 |
+
return HalfGammaLoss().constant_to_optimal_zero(
|
| 833 |
+
y_true=y_true, sample_weight=sample_weight
|
| 834 |
+
)
|
| 835 |
+
else:
|
| 836 |
+
p = self.closs.power
|
| 837 |
+
term = np.power(np.maximum(y_true, 0), 2 - p) / (1 - p) / (2 - p)
|
| 838 |
+
if sample_weight is not None:
|
| 839 |
+
term *= sample_weight
|
| 840 |
+
return term
|
| 841 |
+
|
| 842 |
+
|
| 843 |
+
class HalfTweedieLossIdentity(BaseLoss):
|
| 844 |
+
"""Half Tweedie deviance loss with identity link, for regression.
|
| 845 |
+
|
| 846 |
+
Domain:
|
| 847 |
+
y_true in real numbers for power <= 0
|
| 848 |
+
y_true in non-negative real numbers for 0 < power < 2
|
| 849 |
+
y_true in positive real numbers for 2 <= power
|
| 850 |
+
y_pred in positive real numbers for power != 0
|
| 851 |
+
y_pred in real numbers for power = 0
|
| 852 |
+
power in real numbers
|
| 853 |
+
|
| 854 |
+
Link:
|
| 855 |
+
y_pred = raw_prediction
|
| 856 |
+
|
| 857 |
+
For a given sample x_i, half Tweedie deviance loss with p=power is defined
|
| 858 |
+
as::
|
| 859 |
+
|
| 860 |
+
loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
|
| 861 |
+
- y_true_i * raw_prediction_i**(1-p) / (1-p)
|
| 862 |
+
+ raw_prediction_i**(2-p) / (2-p)
|
| 863 |
+
|
| 864 |
+
Note that the minimum value of this loss is 0.
|
| 865 |
+
|
| 866 |
+
Note furthermore that although no Tweedie distribution exists for
|
| 867 |
+
0 < power < 1, it still gives a strictly consistent scoring function for
|
| 868 |
+
the expectation.
|
| 869 |
+
"""
|
| 870 |
+
|
| 871 |
+
def __init__(self, sample_weight=None, power=1.5):
|
| 872 |
+
super().__init__(
|
| 873 |
+
closs=CyHalfTweedieLossIdentity(power=float(power)),
|
| 874 |
+
link=IdentityLink(),
|
| 875 |
+
)
|
| 876 |
+
if self.closs.power <= 0:
|
| 877 |
+
self.interval_y_true = Interval(-np.inf, np.inf, False, False)
|
| 878 |
+
elif self.closs.power < 2:
|
| 879 |
+
self.interval_y_true = Interval(0, np.inf, True, False)
|
| 880 |
+
else:
|
| 881 |
+
self.interval_y_true = Interval(0, np.inf, False, False)
|
| 882 |
+
|
| 883 |
+
if self.closs.power == 0:
|
| 884 |
+
self.interval_y_pred = Interval(-np.inf, np.inf, False, False)
|
| 885 |
+
else:
|
| 886 |
+
self.interval_y_pred = Interval(0, np.inf, False, False)
|
| 887 |
+
|
| 888 |
+
|
| 889 |
+
class HalfBinomialLoss(BaseLoss):
|
| 890 |
+
"""Half Binomial deviance loss with logit link, for binary classification.
|
| 891 |
+
|
| 892 |
+
This is also know as binary cross entropy, log-loss and logistic loss.
|
| 893 |
+
|
| 894 |
+
Domain:
|
| 895 |
+
y_true in [0, 1], i.e. regression on the unit interval
|
| 896 |
+
y_pred in (0, 1), i.e. boundaries excluded
|
| 897 |
+
|
| 898 |
+
Link:
|
| 899 |
+
y_pred = expit(raw_prediction)
|
| 900 |
+
|
| 901 |
+
For a given sample x_i, half Binomial deviance is defined as the negative
|
| 902 |
+
log-likelihood of the Binomial/Bernoulli distribution and can be expressed
|
| 903 |
+
as::
|
| 904 |
+
|
| 905 |
+
loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i
|
| 906 |
+
|
| 907 |
+
See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
|
| 908 |
+
section 4.4.1 (about logistic regression).
|
| 909 |
+
|
| 910 |
+
Note that the formulation works for classification, y = {0, 1}, as well as
|
| 911 |
+
logistic regression, y = [0, 1].
|
| 912 |
+
If you add `constant_to_optimal_zero` to the loss, you get half the
|
| 913 |
+
Bernoulli/binomial deviance.
|
| 914 |
+
|
| 915 |
+
More details: Inserting the predicted probability y_pred = expit(raw_prediction)
|
| 916 |
+
in the loss gives the well known::
|
| 917 |
+
|
| 918 |
+
loss(x_i) = - y_true_i * log(y_pred_i) - (1 - y_true_i) * log(1 - y_pred_i)
|
| 919 |
+
"""
|
| 920 |
+
|
| 921 |
+
def __init__(self, sample_weight=None):
|
| 922 |
+
super().__init__(
|
| 923 |
+
closs=CyHalfBinomialLoss(),
|
| 924 |
+
link=LogitLink(),
|
| 925 |
+
n_classes=2,
|
| 926 |
+
)
|
| 927 |
+
self.interval_y_true = Interval(0, 1, True, True)
|
| 928 |
+
|
| 929 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 930 |
+
# This is non-zero only if y_true is neither 0 nor 1.
|
| 931 |
+
term = xlogy(y_true, y_true) + xlogy(1 - y_true, 1 - y_true)
|
| 932 |
+
if sample_weight is not None:
|
| 933 |
+
term *= sample_weight
|
| 934 |
+
return term
|
| 935 |
+
|
| 936 |
+
def predict_proba(self, raw_prediction):
|
| 937 |
+
"""Predict probabilities.
|
| 938 |
+
|
| 939 |
+
Parameters
|
| 940 |
+
----------
|
| 941 |
+
raw_prediction : array of shape (n_samples,) or (n_samples, 1)
|
| 942 |
+
Raw prediction values (in link space).
|
| 943 |
+
|
| 944 |
+
Returns
|
| 945 |
+
-------
|
| 946 |
+
proba : array of shape (n_samples, 2)
|
| 947 |
+
Element-wise class probabilities.
|
| 948 |
+
"""
|
| 949 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 950 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 951 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 952 |
+
proba = np.empty((raw_prediction.shape[0], 2), dtype=raw_prediction.dtype)
|
| 953 |
+
proba[:, 1] = self.link.inverse(raw_prediction)
|
| 954 |
+
proba[:, 0] = 1 - proba[:, 1]
|
| 955 |
+
return proba
|
| 956 |
+
|
| 957 |
+
|
| 958 |
+
class HalfMultinomialLoss(BaseLoss):
|
| 959 |
+
"""Categorical cross-entropy loss, for multiclass classification.
|
| 960 |
+
|
| 961 |
+
Domain:
|
| 962 |
+
y_true in {0, 1, 2, 3, .., n_classes - 1}
|
| 963 |
+
y_pred has n_classes elements, each element in (0, 1)
|
| 964 |
+
|
| 965 |
+
Link:
|
| 966 |
+
y_pred = softmax(raw_prediction)
|
| 967 |
+
|
| 968 |
+
Note: We assume y_true to be already label encoded. The inverse link is
|
| 969 |
+
softmax. But the full link function is the symmetric multinomial logit
|
| 970 |
+
function.
|
| 971 |
+
|
| 972 |
+
For a given sample x_i, the categorical cross-entropy loss is defined as
|
| 973 |
+
the negative log-likelihood of the multinomial distribution, it
|
| 974 |
+
generalizes the binary cross-entropy to more than 2 classes::
|
| 975 |
+
|
| 976 |
+
loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))
|
| 977 |
+
- sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)
|
| 978 |
+
|
| 979 |
+
See [1].
|
| 980 |
+
|
| 981 |
+
Note that for the hessian, we calculate only the diagonal part in the
|
| 982 |
+
classes: If the full hessian for classes k and l and sample i is H_i_k_l,
|
| 983 |
+
we calculate H_i_k_k, i.e. k=l.
|
| 984 |
+
|
| 985 |
+
Reference
|
| 986 |
+
---------
|
| 987 |
+
.. [1] :arxiv:`Simon, Noah, J. Friedman and T. Hastie.
|
| 988 |
+
"A Blockwise Descent Algorithm for Group-penalized Multiresponse and
|
| 989 |
+
Multinomial Regression".
|
| 990 |
+
<1311.6529>`
|
| 991 |
+
"""
|
| 992 |
+
|
| 993 |
+
is_multiclass = True
|
| 994 |
+
|
| 995 |
+
def __init__(self, sample_weight=None, n_classes=3):
|
| 996 |
+
super().__init__(
|
| 997 |
+
closs=CyHalfMultinomialLoss(),
|
| 998 |
+
link=MultinomialLogit(),
|
| 999 |
+
n_classes=n_classes,
|
| 1000 |
+
)
|
| 1001 |
+
self.interval_y_true = Interval(0, np.inf, True, False)
|
| 1002 |
+
self.interval_y_pred = Interval(0, 1, False, False)
|
| 1003 |
+
|
| 1004 |
+
def in_y_true_range(self, y):
|
| 1005 |
+
"""Return True if y is in the valid range of y_true.
|
| 1006 |
+
|
| 1007 |
+
Parameters
|
| 1008 |
+
----------
|
| 1009 |
+
y : ndarray
|
| 1010 |
+
"""
|
| 1011 |
+
return self.interval_y_true.includes(y) and np.all(y.astype(int) == y)
|
| 1012 |
+
|
| 1013 |
+
def fit_intercept_only(self, y_true, sample_weight=None):
|
| 1014 |
+
"""Compute raw_prediction of an intercept-only model.
|
| 1015 |
+
|
| 1016 |
+
This is the softmax of the weighted average of the target, i.e. over
|
| 1017 |
+
the samples axis=0.
|
| 1018 |
+
"""
|
| 1019 |
+
out = np.zeros(self.n_classes, dtype=y_true.dtype)
|
| 1020 |
+
eps = np.finfo(y_true.dtype).eps
|
| 1021 |
+
for k in range(self.n_classes):
|
| 1022 |
+
out[k] = np.average(y_true == k, weights=sample_weight, axis=0)
|
| 1023 |
+
out[k] = np.clip(out[k], eps, 1 - eps)
|
| 1024 |
+
return self.link.link(out[None, :]).reshape(-1)
|
| 1025 |
+
|
| 1026 |
+
def predict_proba(self, raw_prediction):
|
| 1027 |
+
"""Predict probabilities.
|
| 1028 |
+
|
| 1029 |
+
Parameters
|
| 1030 |
+
----------
|
| 1031 |
+
raw_prediction : array of shape (n_samples, n_classes)
|
| 1032 |
+
Raw prediction values (in link space).
|
| 1033 |
+
|
| 1034 |
+
Returns
|
| 1035 |
+
-------
|
| 1036 |
+
proba : array of shape (n_samples, n_classes)
|
| 1037 |
+
Element-wise class probabilities.
|
| 1038 |
+
"""
|
| 1039 |
+
return self.link.inverse(raw_prediction)
|
| 1040 |
+
|
| 1041 |
+
def gradient_proba(
|
| 1042 |
+
self,
|
| 1043 |
+
y_true,
|
| 1044 |
+
raw_prediction,
|
| 1045 |
+
sample_weight=None,
|
| 1046 |
+
gradient_out=None,
|
| 1047 |
+
proba_out=None,
|
| 1048 |
+
n_threads=1,
|
| 1049 |
+
):
|
| 1050 |
+
"""Compute gradient and class probabilities fow raw_prediction.
|
| 1051 |
+
|
| 1052 |
+
Parameters
|
| 1053 |
+
----------
|
| 1054 |
+
y_true : C-contiguous array of shape (n_samples,)
|
| 1055 |
+
Observed, true target values.
|
| 1056 |
+
raw_prediction : array of shape (n_samples, n_classes)
|
| 1057 |
+
Raw prediction values (in link space).
|
| 1058 |
+
sample_weight : None or C-contiguous array of shape (n_samples,)
|
| 1059 |
+
Sample weights.
|
| 1060 |
+
gradient_out : None or array of shape (n_samples, n_classes)
|
| 1061 |
+
A location into which the gradient is stored. If None, a new array
|
| 1062 |
+
might be created.
|
| 1063 |
+
proba_out : None or array of shape (n_samples, n_classes)
|
| 1064 |
+
A location into which the class probabilities are stored. If None,
|
| 1065 |
+
a new array might be created.
|
| 1066 |
+
n_threads : int, default=1
|
| 1067 |
+
Might use openmp thread parallelism.
|
| 1068 |
+
|
| 1069 |
+
Returns
|
| 1070 |
+
-------
|
| 1071 |
+
gradient : array of shape (n_samples, n_classes)
|
| 1072 |
+
Element-wise gradients.
|
| 1073 |
+
|
| 1074 |
+
proba : array of shape (n_samples, n_classes)
|
| 1075 |
+
Element-wise class probabilities.
|
| 1076 |
+
"""
|
| 1077 |
+
if gradient_out is None:
|
| 1078 |
+
if proba_out is None:
|
| 1079 |
+
gradient_out = np.empty_like(raw_prediction)
|
| 1080 |
+
proba_out = np.empty_like(raw_prediction)
|
| 1081 |
+
else:
|
| 1082 |
+
gradient_out = np.empty_like(proba_out)
|
| 1083 |
+
elif proba_out is None:
|
| 1084 |
+
proba_out = np.empty_like(gradient_out)
|
| 1085 |
+
|
| 1086 |
+
self.closs.gradient_proba(
|
| 1087 |
+
y_true=y_true,
|
| 1088 |
+
raw_prediction=raw_prediction,
|
| 1089 |
+
sample_weight=sample_weight,
|
| 1090 |
+
gradient_out=gradient_out,
|
| 1091 |
+
proba_out=proba_out,
|
| 1092 |
+
n_threads=n_threads,
|
| 1093 |
+
)
|
| 1094 |
+
return gradient_out, proba_out
|
| 1095 |
+
|
| 1096 |
+
|
| 1097 |
+
class ExponentialLoss(BaseLoss):
|
| 1098 |
+
"""Exponential loss with (half) logit link, for binary classification.
|
| 1099 |
+
|
| 1100 |
+
This is also know as boosting loss.
|
| 1101 |
+
|
| 1102 |
+
Domain:
|
| 1103 |
+
y_true in [0, 1], i.e. regression on the unit interval
|
| 1104 |
+
y_pred in (0, 1), i.e. boundaries excluded
|
| 1105 |
+
|
| 1106 |
+
Link:
|
| 1107 |
+
y_pred = expit(2 * raw_prediction)
|
| 1108 |
+
|
| 1109 |
+
For a given sample x_i, the exponential loss is defined as::
|
| 1110 |
+
|
| 1111 |
+
loss(x_i) = y_true_i * exp(-raw_pred_i)) + (1 - y_true_i) * exp(raw_pred_i)
|
| 1112 |
+
|
| 1113 |
+
See:
|
| 1114 |
+
- J. Friedman, T. Hastie, R. Tibshirani.
|
| 1115 |
+
"Additive logistic regression: a statistical view of boosting (With discussion
|
| 1116 |
+
and a rejoinder by the authors)." Ann. Statist. 28 (2) 337 - 407, April 2000.
|
| 1117 |
+
https://doi.org/10.1214/aos/1016218223
|
| 1118 |
+
- A. Buja, W. Stuetzle, Y. Shen. (2005).
|
| 1119 |
+
"Loss Functions for Binary Class Probability Estimation and Classification:
|
| 1120 |
+
Structure and Applications."
|
| 1121 |
+
|
| 1122 |
+
Note that the formulation works for classification, y = {0, 1}, as well as
|
| 1123 |
+
"exponential logistic" regression, y = [0, 1].
|
| 1124 |
+
Note that this is a proper scoring rule, but without it's canonical link.
|
| 1125 |
+
|
| 1126 |
+
More details: Inserting the predicted probability
|
| 1127 |
+
y_pred = expit(2 * raw_prediction) in the loss gives::
|
| 1128 |
+
|
| 1129 |
+
loss(x_i) = y_true_i * sqrt((1 - y_pred_i) / y_pred_i)
|
| 1130 |
+
+ (1 - y_true_i) * sqrt(y_pred_i / (1 - y_pred_i))
|
| 1131 |
+
"""
|
| 1132 |
+
|
| 1133 |
+
def __init__(self, sample_weight=None):
|
| 1134 |
+
super().__init__(
|
| 1135 |
+
closs=CyExponentialLoss(),
|
| 1136 |
+
link=HalfLogitLink(),
|
| 1137 |
+
n_classes=2,
|
| 1138 |
+
)
|
| 1139 |
+
self.interval_y_true = Interval(0, 1, True, True)
|
| 1140 |
+
|
| 1141 |
+
def constant_to_optimal_zero(self, y_true, sample_weight=None):
|
| 1142 |
+
# This is non-zero only if y_true is neither 0 nor 1.
|
| 1143 |
+
term = -2 * np.sqrt(y_true * (1 - y_true))
|
| 1144 |
+
if sample_weight is not None:
|
| 1145 |
+
term *= sample_weight
|
| 1146 |
+
return term
|
| 1147 |
+
|
| 1148 |
+
def predict_proba(self, raw_prediction):
|
| 1149 |
+
"""Predict probabilities.
|
| 1150 |
+
|
| 1151 |
+
Parameters
|
| 1152 |
+
----------
|
| 1153 |
+
raw_prediction : array of shape (n_samples,) or (n_samples, 1)
|
| 1154 |
+
Raw prediction values (in link space).
|
| 1155 |
+
|
| 1156 |
+
Returns
|
| 1157 |
+
-------
|
| 1158 |
+
proba : array of shape (n_samples, 2)
|
| 1159 |
+
Element-wise class probabilities.
|
| 1160 |
+
"""
|
| 1161 |
+
# Be graceful to shape (n_samples, 1) -> (n_samples,)
|
| 1162 |
+
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
|
| 1163 |
+
raw_prediction = raw_prediction.squeeze(1)
|
| 1164 |
+
proba = np.empty((raw_prediction.shape[0], 2), dtype=raw_prediction.dtype)
|
| 1165 |
+
proba[:, 1] = self.link.inverse(raw_prediction)
|
| 1166 |
+
proba[:, 0] = 1 - proba[:, 1]
|
| 1167 |
+
return proba
|
| 1168 |
+
|
| 1169 |
+
|
| 1170 |
+
_LOSSES = {
|
| 1171 |
+
"squared_error": HalfSquaredError,
|
| 1172 |
+
"absolute_error": AbsoluteError,
|
| 1173 |
+
"pinball_loss": PinballLoss,
|
| 1174 |
+
"huber_loss": HuberLoss,
|
| 1175 |
+
"poisson_loss": HalfPoissonLoss,
|
| 1176 |
+
"gamma_loss": HalfGammaLoss,
|
| 1177 |
+
"tweedie_loss": HalfTweedieLoss,
|
| 1178 |
+
"binomial_loss": HalfBinomialLoss,
|
| 1179 |
+
"multinomial_loss": HalfMultinomialLoss,
|
| 1180 |
+
"exponential_loss": ExponentialLoss,
|
| 1181 |
+
}
|
.venv/lib/python3.12/site-packages/sklearn/_loss/meson.build
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# .pyx is generated, so this is needed to make Cython compilation work
|
| 2 |
+
_loss_cython_tree = [
|
| 3 |
+
fs.copyfile('_loss.pxd')
|
| 4 |
+
]
|
| 5 |
+
|
| 6 |
+
_loss_pyx = custom_target(
|
| 7 |
+
'_loss_pyx',
|
| 8 |
+
output: '_loss.pyx',
|
| 9 |
+
input: '_loss.pyx.tp',
|
| 10 |
+
command: [tempita, '@INPUT@', '-o', '@OUTDIR@'],
|
| 11 |
+
# TODO in principle this should go in py.exension_module below. This is
|
| 12 |
+
# temporary work-around for dependency issue with .pyx.tp files. For more
|
| 13 |
+
# details, see https://github.com/mesonbuild/meson/issues/13212
|
| 14 |
+
depends: _loss_cython_tree,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
py.extension_module(
|
| 18 |
+
'_loss',
|
| 19 |
+
cython_gen.process(_loss_pyx),
|
| 20 |
+
dependencies: [openmp_dep],
|
| 21 |
+
install: true,
|
| 22 |
+
subdir: 'sklearn/_loss',
|
| 23 |
+
)
|