diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.h new file mode 100644 index 0000000000000000000000000000000000000000..829a6424550b3f44d56f1b09f316d7abce3961c8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.h @@ -0,0 +1,99 @@ +/******************************************************************************* +* Copyright 2020-2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_GRAPH_SYCL_H +#define ONEAPI_DNNL_DNNL_GRAPH_SYCL_H + +#include "oneapi/dnnl/dnnl_graph.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_graph_api +/// @{ + +/// @addtogroup dnnl_graph_api_interop +/// @{ + +/// @addtogroup dnnl_graph_api_sycl_interop +/// @{ + +/// Allocation call-back function interface for SYCL. SYCL allocator should be +/// used for SYCL runtime and host allocator should be used for non-SYCL. The +/// call-back should return a USM device memory pointer. +typedef void *(*dnnl_graph_sycl_allocate_f)( + size_t size, size_t alignment, const void *dev, const void *context); + +/// Deallocation call-back function interface for SYCL. SYCL allocator should be +/// used for SYCL runtime and host allocator should be used for non-SYCL. The +/// call-back should deallocate a USM device memory returned by +/// #dnnl_graph_sycl_allocate_f. +typedef void (*dnnl_graph_sycl_deallocate_f)( + void *buf, const void *dev, const void *context, void *event); + +/// Creates an allocator with the given allocation and deallocation call-back +/// function pointers. +/// +/// @param allocator Output allocator +/// @param sycl_malloc A pointer to SYCL malloc function +/// @param sycl_free A pointer to SYCL free function +/// @returns #dnnl_success on success and a status describing the +/// error otherwise. +dnnl_status_t DNNL_API dnnl_graph_sycl_interop_allocator_create( + dnnl_graph_allocator_t *allocator, + dnnl_graph_sycl_allocate_f sycl_malloc, + dnnl_graph_sycl_deallocate_f sycl_free); + +/// This API is a supplement for existing onednn engine API. +dnnl_status_t DNNL_API dnnl_graph_sycl_interop_make_engine_with_allocator( + dnnl_engine_t *engine, const void *device, const void *context, + const_dnnl_graph_allocator_t alloc); + +/// Execute a compiled partition with sycl runtime. +/// +/// @param compiled_partition The handle of target compiled_partition. +/// @param stream The stream used for execution +/// @param num_inputs The number of input tensors +/// @param inputs A list of input tensors +/// @param num_outputs The number of output tensors +/// @param outputs A non-empty list of output tensors +/// @param deps Optional handle of list with `sycl::event` dependencies. +/// @param sycl_event The handle of sycl event. +/// @returns #dnnl_success on success and a status describing the +/// error otherwise. +dnnl_status_t DNNL_API dnnl_graph_sycl_interop_compiled_partition_execute( + const_dnnl_graph_compiled_partition_t compiled_partition, + dnnl_stream_t stream, size_t num_inputs, + const_dnnl_graph_tensor_t *inputs, size_t num_outputs, + const_dnnl_graph_tensor_t *outputs, const void *deps, void *sycl_event); + +/// @} dnnl_graph_api_sycl_interop + +/// @} dnnl_graph_api_interop + +/// @} dnnl_graph_api + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..acb28b905319338bfb97d4fb39044161a827fbd6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_sycl.hpp @@ -0,0 +1,131 @@ +/******************************************************************************* +* Copyright 2020-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/// @file +/// Graph SYCL interop API + +#ifndef ONEAPI_DNNL_DNNL_GRAPH_SYCL_HPP +#define ONEAPI_DNNL_DNNL_GRAPH_SYCL_HPP + +/// @cond DO_NOT_DOCUMENT_THIS +#include + +#if __has_include() +#include +#else +#error "Unsupported compiler" +#endif + +#include "oneapi/dnnl/dnnl_graph.hpp" +#include "oneapi/dnnl/dnnl_graph_sycl.h" +/// @endcond + +/// @addtogroup dnnl_api +/// @{ + +namespace dnnl { + +/// @addtogroup dnnl_graph_api +/// @{ + +namespace graph { + +/// @addtogroup dnnl_graph_api_interop Runtime interoperability API +/// API extensions to interact with the underlying run-time. +/// @{ + +/// @addtogroup dnnl_graph_api_sycl_interop SYCL interoperability API +/// API extensions to interact with the underlying SYCL run-time. +/// @{ + +/// SYCL interoperability namespace +namespace sycl_interop { + +/// Constructs an allocator from SYCL malloc and free function pointer. SYCL +/// allocator should be used for SYCL runtime and host allocator should be used +/// for non-SYCL. Currently, only device USM allocator is supported. +/// +/// @param sycl_malloc The pointer to SYCL malloc function +/// @param sycl_free The pointer to SYCL free function +/// @returns Created allocator +inline allocator make_allocator(dnnl_graph_sycl_allocate_f sycl_malloc, + dnnl_graph_sycl_deallocate_f sycl_free) { + dnnl_graph_allocator_t c_allocator = nullptr; + error::wrap_c_api(dnnl_graph_sycl_interop_allocator_create( + &c_allocator, sycl_malloc, sycl_free), + "could not create allocator for sycl device"); + return allocator(c_allocator); +} + +inline engine make_engine_with_allocator(const sycl::device &adevice, + const sycl::context &acontext, const allocator &alloc) { + dnnl_engine_t c_engine; + error::wrap_c_api( + dnnl_graph_sycl_interop_make_engine_with_allocator(&c_engine, + static_cast(&adevice), + static_cast(&acontext), alloc.get()), + "could not make an engine with allocator"); + return engine(c_engine); +} + +/// Executes a compiled partition in a specified stream and returns a SYCL +/// event. +/// +/// @param c_partition Compiled partition to execute. +/// @param astream Stream object to run over +/// @param inputs Arguments map. +/// @param outputs Arguments map. +/// @param deps Optional vector with `sycl::event` dependencies. +/// @returns Output event. +inline sycl::event execute(compiled_partition &c_partition, stream &astream, + const std::vector &inputs, std::vector &outputs, + const std::vector &deps = {}) { + std::vector c_inputs; + c_inputs.reserve(inputs.size()); + for (auto &in : inputs) { + c_inputs.push_back(in.get()); + } + std::vector c_outputs; + c_outputs.reserve(outputs.size()); + for (auto &out : outputs) { + c_outputs.push_back(out.get()); + } + + sycl::event sycl_event; + error::wrap_c_api(dnnl_graph_sycl_interop_compiled_partition_execute( + c_partition.get(), astream.get(), c_inputs.size(), + c_inputs.data(), c_outputs.size(), + c_outputs.data(), &deps, &sycl_event), + "could not execute the compiled_partition on a specified sycl " + "stream"); + return sycl_event; +} + +} // namespace sycl_interop + +/// @} dnnl_graph_api_sycl_interop + +/// @} dnnl_graph_api_interop + +} // namespace graph + +/// @} dnnl_graph_api + +} // namespace dnnl + +/// @} dnnl_api + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_types.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_types.h new file mode 100644 index 0000000000000000000000000000000000000000..4193c7bdb517676571b8331f2e89c57ded6c5630 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_graph_types.h @@ -0,0 +1,475 @@ +/******************************************************************************* + * Copyright 2020-2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +/// @file +/// C API definitions + +#ifndef ONEAPI_DNNL_DNNL_GRAPH_TYPES_H +#define ONEAPI_DNNL_DNNL_GRAPH_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond DO_NOT_DOCUMENT_THIS +#include +#include + +#include "oneapi/dnnl/dnnl_common_types.h" +/// @endcond + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_graph_api +/// @{ + +/// @addtogroup dnnl_graph_api_logical_tensor +/// @{ + +/// A wildcard value for number of dimensions which is unknown at a tensor or +/// operation creation time. +#define DNNL_GRAPH_UNKNOWN_NDIMS -1 + +/// A wildcard value for dimensions that are unknown at a tensor or operation +/// creation time. +#define DNNL_GRAPH_UNKNOWN_DIM INT64_MIN + +/// Layout type specification +typedef enum { + /// Undefined layout type + dnnl_graph_layout_type_undef = 0, + /// Any means to let the library to decide the layout for a tensor during + /// partition compilation. + dnnl_graph_layout_type_any = 1, + /// Strided means that the layout of a tensor is determined by the strides + /// field in the logical tensor. + dnnl_graph_layout_type_strided = 2, + /// Opaque means that the layout of a tensor is the library specific. + /// Usually, an opaque layout is generated by a partition which is compiled + /// with layout type any. + dnnl_graph_layout_type_opaque = 3, +} dnnl_graph_layout_type_t; + +/// Logical tensor property +typedef enum { + /// Undefined tensor property + dnnl_graph_tensor_property_undef = 0, + /// Variable means the tensor may be changed during computation or between + /// different iterations. + dnnl_graph_tensor_property_variable = 1, + /// Constant means the tensor will keep unchanged during computation and + /// between different iterations. It's useful for the library to apply + /// optimizations for constant tensors or cache constant tensors inside the + /// library. For example, constant weight tensors in inference scenarios. + dnnl_graph_tensor_property_constant = 2, +} dnnl_graph_tensor_property_t; + +/// Logical tensor. It is based on an ID, a number of dimensions, dimensions +/// themselves, element data type, tensor property and tensor memory layout. +typedef struct { + /// Unique id of each logical tensor. The library uses logical tensor IDs to + /// build up the connections between operations if the output of one + /// operation has the same ID as the input of another operation. + size_t id; + + /// Number of dimensions. -1 means unknown (DNNL_GRAPH_UNKNOWN_NDIMS). 0 is + /// used to define scalar tensor. + int ndims; + + /// Size of each dimension. #DNNL_GRAPH_UNKNOWN_DIM means the size of that + /// dimension is unknown. 0 is used to define zero-dimension tensor. The + /// library supports to deduce output shapes according to input shapes + /// during compilation. Unlike memory descriptor in oneDNN primitive API, + /// the order of dimensions is not defined in logical tensor. It is defined + /// by the operations which respect the order through the attributes + /// #dnnl_graph_op_attr_data_format or #dnnl_graph_op_attr_weights_format. + /// For example, for a Convolution with `data_format=NXC`, it means the + /// first element of dims of activation tensor is mini-batch size, the last + /// effective element of dims is channel size, and other elements between + /// them are spatial dimensions. + dnnl_dims_t dims; + + /// Data type of the tensor elements. + dnnl_data_type_t data_type; + + /// Property type of the tensor. + dnnl_graph_tensor_property_t property; + + /// Layout type of the tensor. + dnnl_graph_layout_type_t layout_type; + union { + /// The field is valid when `layout_type` is + /// #dnnl_graph_layout_type_strided. #DNNL_GRAPH_UNKNOWN_DIM means the + /// stride of the dimension is unknown. The library currently doesn't + /// support other negative stride values. + dnnl_dims_t strides; + + /// The field is valid when `layout_type` is + /// #dnnl_graph_layout_type_opaque. An opaque layout ID is usually + /// generated by a partition which is compiled with layout type any. + size_t layout_id; + } layout; +} dnnl_graph_logical_tensor_t; + +/// @} dnnl_graph_api_logical_tensor + +/// @addtogroup dnnl_graph_api_partition +/// @{ + +/// Policy specifications for partitioning +typedef enum { + /// Fusion policy returns partitions with typical post-op fusions, eg. + /// Convolution + ReLU or other element-wise operations or a chian of + /// post-ops. + dnnl_graph_partition_policy_fusion = 1, + /// Debug policy doesn't not apply any fusions. It returns partitions with + /// single operation in each partition. The policy is useful when users + /// notice any bug or correctness issue in fusion policy. + dnnl_graph_partition_policy_debug = 2, +} dnnl_graph_partition_policy_t; + +/// An opaque structure to describe a partition. +struct dnnl_graph_partition; + +/// A partition handle. +typedef struct dnnl_graph_partition *dnnl_graph_partition_t; + +/// A constant partition handle. +typedef const struct dnnl_graph_partition *const_dnnl_graph_partition_t; + +/// @} dnnl_graph_api_partition + +/// @addtogroup dnnl_graph_api_graph +/// @{ + +/// An opaque structure to describe a graph. +struct dnnl_graph_graph; + +/// A graph handle. +typedef struct dnnl_graph_graph *dnnl_graph_graph_t; + +/// A constant graph handle. +typedef const struct dnnl_graph_graph *const_dnnl_graph_graph_t; + +/// @} dnnl_graph_api_graph + +/// @addtogroup dnnl_graph_api_op +/// @{ + +/// Kinds of operations +typedef enum { + dnnl_graph_op_abs, + dnnl_graph_op_abs_backward, + dnnl_graph_op_add, + dnnl_graph_op_avg_pool, + dnnl_graph_op_avg_pool_backward, + dnnl_graph_op_batch_norm_backward, + dnnl_graph_op_batch_norm_forward_training, + dnnl_graph_op_batch_norm_inference, + dnnl_graph_op_bias_add, + dnnl_graph_op_bias_add_backward, + dnnl_graph_op_clamp, + dnnl_graph_op_clamp_backward, + dnnl_graph_op_concat, + dnnl_graph_op_convolution, + dnnl_graph_op_convolution_backward_data, + dnnl_graph_op_convolution_backward_weights, + dnnl_graph_op_conv_transpose, + dnnl_graph_op_conv_transpose_backward_data, + dnnl_graph_op_conv_transpose_backward_weights, + dnnl_graph_op_dequantize, + dnnl_graph_op_divide, + dnnl_graph_op_dynamic_dequantize, + dnnl_graph_op_dynamic_quantize, + dnnl_graph_op_elu, + dnnl_graph_op_elu_backward, + dnnl_graph_op_end, + dnnl_graph_op_exp, + dnnl_graph_op_gelu, + dnnl_graph_op_gelu_backward, + dnnl_graph_op_hard_swish, + dnnl_graph_op_hard_swish_backward, + dnnl_graph_op_interpolate, + dnnl_graph_op_interpolate_backward, + dnnl_graph_op_layer_norm, + dnnl_graph_op_layer_norm_backward, + dnnl_graph_op_leaky_relu, + dnnl_graph_op_log, + dnnl_graph_op_log_softmax, + dnnl_graph_op_log_softmax_backward, + dnnl_graph_op_matmul, + dnnl_graph_op_maximum, + dnnl_graph_op_max_pool, + dnnl_graph_op_max_pool_backward, + dnnl_graph_op_minimum, + dnnl_graph_op_mish, + dnnl_graph_op_mish_backward, + dnnl_graph_op_multiply, + dnnl_graph_op_prelu, + dnnl_graph_op_prelu_backward, + dnnl_graph_op_quantize, + dnnl_graph_op_reciprocal, + dnnl_graph_op_reduce_l1, + dnnl_graph_op_reduce_l2, + dnnl_graph_op_reduce_max, + dnnl_graph_op_reduce_mean, + dnnl_graph_op_reduce_min, + dnnl_graph_op_reduce_prod, + dnnl_graph_op_reduce_sum, + dnnl_graph_op_relu, + dnnl_graph_op_relu_backward, + dnnl_graph_op_reorder, + dnnl_graph_op_round, + dnnl_graph_op_sigmoid, + dnnl_graph_op_sigmoid_backward, + dnnl_graph_op_softmax, + dnnl_graph_op_softmax_backward, + dnnl_graph_op_softplus, + dnnl_graph_op_softplus_backward, + dnnl_graph_op_sqrt, + dnnl_graph_op_sqrt_backward, + dnnl_graph_op_square, + dnnl_graph_op_squared_difference, + dnnl_graph_op_static_reshape, + dnnl_graph_op_static_transpose, + dnnl_graph_op_subtract, + dnnl_graph_op_tanh, + dnnl_graph_op_tanh_backward, + dnnl_graph_op_type_cast, + dnnl_graph_op_wildcard, + dnnl_graph_op_hard_sigmoid, + dnnl_graph_op_hard_sigmoid_backward, + dnnl_graph_op_select, + dnnl_graph_op_pow, + dnnl_graph_op_group_norm, + dnnl_graph_op_gen_index, + dnnl_graph_op_greater_equal, + dnnl_graph_op_last_symbol, +} dnnl_graph_op_kind_t; + +/// Attributes of operations +typedef enum { + /// Undefined op attribute. + dnnl_graph_op_attr_undef = 0, + + // float32 attributes. The value of these attributes can be any single + // float32 number. + + /// Specifies an alpha attribute to an op. + dnnl_graph_op_attr_alpha = 0x1, + /// Specifies an beta attribute to an op. + dnnl_graph_op_attr_beta, + /// Specifies an epsilon attribute to an op. + dnnl_graph_op_attr_epsilon, + /// Specifies a max attribute to an op. + dnnl_graph_op_attr_max, + ///Specifies a min attribute to an op. + dnnl_graph_op_attr_min, + /// Specifies a momentum attribute to an op. + dnnl_graph_op_attr_momentum, + + // float32 vector attributes. The value of these attributes can be a vector + // of float32 numbers. + + /// Specifies a scales attribute to an op. + dnnl_graph_op_attr_scales = 0x20, + + // int64_t attributes. The value of these attributes can be any single int64 + // number. + + /// Specifies an axis attribute to an op. + dnnl_graph_op_attr_axis = 0x30, + /// Specifies a begin_norm_axis attribute to an op. + dnnl_graph_op_attr_begin_norm_axis, + /// Specifies a groups attribute to an op. + dnnl_graph_op_attr_groups, + + // int64_t vector attributes. The value of these attributes can be a vector + // of int64 numbers. + + /// Specifies an axes attribute to an op. + dnnl_graph_op_attr_axes = 0x40, + /// Specifies a dilations attribute to an op. + dnnl_graph_op_attr_dilations, + /// Specifies an dst_shape attribute to an op. + dnnl_graph_op_attr_dst_shape, + /// Specifies a kernel attribute to an op. + dnnl_graph_op_attr_kernel, + /// Specifies an order attribute to an op. + dnnl_graph_op_attr_order, + /// Specifies an output_padding attribute to an op. + dnnl_graph_op_attr_output_padding, + /// Specifies a pads_begin attribute to an op. + dnnl_graph_op_attr_pads_begin, + /// Specifies a pads_end attribute to an op. + dnnl_graph_op_attr_pads_end, + /// Specifies a shape attribute to an op. + dnnl_graph_op_attr_shape, + /// Specifies a sizes attribute to an op. + dnnl_graph_op_attr_sizes, + /// Specifies a input_shape attribute to an op. + dnnl_graph_op_attr_src_shape, + /// Specifies a strides attribute to an op. + dnnl_graph_op_attr_strides, + /// Specifies a weight_shape attribute to an op. + dnnl_graph_op_attr_weights_shape, + /// Specifies a zps attribute to an op. + dnnl_graph_op_attr_zps, + /// Specifies a group shape attribute to an op. + dnnl_graph_op_attr_group_shape, + + // bool attributes. The value of these attributes can be any single bool + // value. + + /// Specifies an exclude_pad attribute to an op. + dnnl_graph_op_attr_exclude_pad = 0x60, + /// Specifies a keep_dims attribute to an op. + dnnl_graph_op_attr_keep_dims, + /// Specifies a keep_stats attribute to an op. + dnnl_graph_op_attr_keep_stats, + /// Specifies a per_channel_broadcast attribute to an op. + dnnl_graph_op_attr_per_channel_broadcast, + /// Specifies a special_zero attribute to an op. + dnnl_graph_op_attr_special_zero, + /// Specifies a transpose_a attribute to an op. + dnnl_graph_op_attr_transpose_a, + /// Specifies a transpose_b attribute to an op. + dnnl_graph_op_attr_transpose_b, + /// Specifies an use_affine attribute to an op. + dnnl_graph_op_attr_use_affine, + /// Specifies an use_dst attribute to an op. + dnnl_graph_op_attr_use_dst, + + // string attributes. The value of these attributes can be a string. + + /// Specifies an auto_broadcast attribute to an op. The value can be "none" + /// or "numpy". + dnnl_graph_op_attr_auto_broadcast = 0x80, + /// Specifies an auto_pad attribute to an op. The value can be "none", + /// "same_upper", "same_lower", or "valid". + dnnl_graph_op_attr_auto_pad, + /// Specifies an coordinate_transformation_mode attribute to an op. The + /// value can be "half_pixel" or "align_corners". The attribute is defined + /// for Interpolate operations. + dnnl_graph_op_attr_coordinate_transformation_mode, + /// Specifies a data_format of an op. The value can be "NCX" or "NXC". + dnnl_graph_op_attr_data_format, + /// Specifies a mode attribute of an op. The value can be "nearest", + /// "linear", "bilinear", or "trilinear". The attribute is defined for + /// Interpolate operations. + dnnl_graph_op_attr_mode, + /// Specifies a qtype attribute to an op. The value can be "per_channel" or + /// "per_tensor". The attribute is defined for quantization operations. + dnnl_graph_op_attr_qtype, + /// Specifies a rounding_type attribute to an op. The value can be "ceil" or + /// "floor". + dnnl_graph_op_attr_rounding_type, + /// Specifies a weights_format of an op. The value can be "OIX", "XIO", + /// "IOX", or "XOI". Different operations may support different values. + dnnl_graph_op_attr_weights_format, + + /// Specifies the end of all above exteral attributes for check. + dnnl_graph_op_attr_end = 0xFF, +} dnnl_graph_op_attr_t; + +/// An opaque structure to describe an operation. +struct dnnl_graph_op; + +/// An operation handle. +typedef struct dnnl_graph_op *dnnl_graph_op_t; + +/// A constant operation handle. +typedef const struct dnnl_graph_op *const_dnnl_graph_op_t; + +/// @} dnnl_graph_api_op + +/// @addtogroup dnnl_graph_api_allocator +/// @{ + +/// Allocation call-back function interface for host. For SYCL allocator, see +/// #dnnl_graph_sycl_allocate_f. +typedef void *(*dnnl_graph_host_allocate_f)(size_t size, size_t alignment); + +/// Deallocation call-back function interface for host. For SYCL allocator, see +/// #dnnl_graph_sycl_deallocate_f. +typedef void (*dnnl_graph_host_deallocate_f)(void *); + +/// An opaque structure to describe an allocator. +struct dnnl_graph_allocator; + +/// An allocator handle. +typedef struct dnnl_graph_allocator *dnnl_graph_allocator_t; + +/// A constant allocator handle. +typedef const struct dnnl_graph_allocator *const_dnnl_graph_allocator_t; + +/// @} dnnl_graph_api_allocator + +/// @addtogroup dnnl_graph_api_compiled_partition +/// @{ + +/// In-place pair definition. It can queried from a compiled partition +/// indicating that an input and an output of the partition can share the same +/// memory buffer for computation. In-place computation helps to reduce the +/// memory footprint and improves cache locality. But since the library may not +/// have a global view of user's application, it's possible that the tensor with +/// `input_id` is used at other places in user's computation graph. In this +/// case, the user should take the in-place pair as a hint and pass a different +/// memory buffer for output tensor to avoid overwriting the input memory buffer +/// which will probably cause unexpected incorrect results. +typedef struct { + /// The id of input tensor + size_t input_id; + + /// The id of output tensor + size_t output_id; +} dnnl_graph_inplace_pair_t; + +/// An opaque structure to describe a compiled partition. +struct dnnl_graph_compiled_partition; + +/// A compiled partition handle. +typedef struct dnnl_graph_compiled_partition *dnnl_graph_compiled_partition_t; + +/// A constant compiled partition handle. +typedef const struct dnnl_graph_compiled_partition + *const_dnnl_graph_compiled_partition_t; + +/// @} dnnl_graph_api_compiled_partition + +/// @addtogroup dnnl_graph_api_tensor +/// @{ + +/// An opaque structure to describe a tensor. +struct dnnl_graph_tensor; + +/// A tensor handle. +typedef struct dnnl_graph_tensor *dnnl_graph_tensor_t; + +/// A constant tensor handle. +typedef const struct dnnl_graph_tensor *const_dnnl_graph_tensor_t; + +/// @} dnnl_graph_api_tensor + +/// @} dnnl_graph_api + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.h new file mode 100644 index 0000000000000000000000000000000000000000..225dec78608f7bb4b88f0d4d7dcbaa72ce153abd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.h @@ -0,0 +1,276 @@ +/******************************************************************************* +* Copyright 2020-2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_OCL_H +#define ONEAPI_DNNL_DNNL_OCL_H + +#include "oneapi/dnnl/dnnl.h" + +#include "oneapi/dnnl/dnnl_ocl_types.h" + +/// @cond DO_NOT_DOCUMENT_THIS +// Set target version for OpenCL explicitly to suppress a compiler warning. +#ifndef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 120 +#endif + +#include +/// @endcond + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_ocl_interop +/// @{ + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl_memory_set_data_handle() has been called, if @p memory_kind is equal +/// to dnnl_ocl_interop_usm, or +/// - dnnl_ocl_interop_memory_set_mem_object() has been called, if @p memory_kind +/// is equal to dnnl_ocl_interop_buffer. +/// +/// @param memory Output memory object. +/// @param memory_desc Memory descriptor. +/// @param engine Engine to use. +/// @param memory_kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl_ocl_interop_usm. +/// - An OpenCL buffer. In this case the library doesn't own the buffer. +/// Requires @p memory_kind be equal to be equal to dnnl_ocl_interop_buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create(dnnl_memory_t *memory, + const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine, + dnnl_ocl_interop_memory_kind_t memory_kind, void *handle); + +#ifdef DNNL_EXPERIMENTAL_SPARSE +/// Creates a memory object with multiple handles. +/// +/// @param memory Output memory object. +/// @param memory_desc Memory descriptor. +/// @param engine Engine to use. +/// @param memory_kind Memory allocation kind to specify the type of handles. +/// @param nhandles Number of handles. +/// @param handles Handles of the memory buffers to use as underlying storages. +/// For each element of the @p handles array the following applies: +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl_ocl_interop_usm. +/// - An OpenCL buffer. In this case the library doesn't own the buffer. +/// Requires @p memory_kind be equal to be equal to dnnl_ocl_interop_buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create_v2(dnnl_memory_t *memory, + const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine, + dnnl_ocl_interop_memory_kind_t memory_kind, int nhandles, + void **handles); +#endif + +/// Returns the memory allocation kind associated with a memory object. +/// +/// @param memory Memory to query. +/// @param memory_kind Output underlying memory allocation kind of the memory +/// object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_memory_get_memory_kind( + const_dnnl_memory_t memory, + dnnl_ocl_interop_memory_kind_t *memory_kind); + +/// Returns an OpenCL memory object associated with a memory object. +/// +/// @param memory Memory object. +/// @param mem_object Output OpenCL memory object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_memory_get_mem_object( + const_dnnl_memory_t memory, cl_mem *mem_object); + +/// Sets OpenCL memory object associated with a memory object. +/// +/// For behavioral details, see dnnl_memory_set_data_handle(). +/// +/// @param memory Memory object. +/// @param mem_object OpenCL memory object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_memory_set_mem_object( + dnnl_memory_t memory, cl_mem mem_object); + +/// Retrieves a cache blob ID for the OpenCL device. +/// +/// @warning +/// This API is intended to be used with +/// #dnnl_ocl_interop_engine_get_cache_blob() and +/// #dnnl_ocl_interop_engine_create_from_cache_blob(). The returned cache +/// blob ID can only be used as an ID of the cache blob returned by +/// #dnnl_ocl_interop_engine_get_cache_blob(). +/// +/// @note The cache blob ID can be empty (@p size will be 0 and +/// @p cache_blob_id will be nullptr) if oneDNN doesn't have anything to +/// put in the cache blob. (#dnnl_ocl_interop_engine_get_cache_blob will +/// return an empty cache blob). +/// +/// @param device An OpenCL device. +/// @param size Size of the cache blob ID in bytes. +/// @param cache_blob_id Cache blob id of size @p size. If +/// the @p cache_blob_id is nullptr then the size of the cache blob ID is +/// returned in @p size. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_engine_get_cache_blob_id( + cl_device_id device, size_t *size, uint8_t *cache_blob_id); + +/// Retrieves a cache blob associated with the given engine. +/// +/// @note The cache blob can be empty (@p size will be 0 and @p cache_blob +/// will be nullptr) if oneDNN doesn't have anything to put in the cache +/// blob. It's the user's responsibility to check whether it's empty +/// prior to passing it to +/// #dnnl_ocl_interop_engine_create_from_cache_blob(). +/// +/// @param engine Engine to query for the cache blob. +/// @param size Size of the cache blob in bytes. +/// @param cache_blob Cache blob of size @p size. If the @p cache_blob is +/// nullptr then the size of the cache blob is returned in @p size. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_engine_get_cache_blob( + dnnl_engine_t engine, size_t *size, uint8_t *cache_blob); + +/// Creates an engine from the given cache blob. +/// +/// @param engine Output engine. +/// @param device The OpenCL device that this engine will encapsulate. +/// @param context The OpenCL context (containing the device) that this +/// engine will use for all operations. +/// @param size Size of the cache blob in bytes. +/// @param cache_blob Cache blob of size @p size. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_engine_create_from_cache_blob( + dnnl_engine_t *engine, cl_device_id device, cl_context context, + size_t size, const uint8_t *cache_blob); + +/// Creates an engine associated with an OpenCL device and an OpenCL context. +/// +/// @param engine Output engine. +/// @param device Underlying OpenCL device to use for the engine. +/// @param context Underlying OpenCL context to use for the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_engine_create( + dnnl_engine_t *engine, cl_device_id device, cl_context context); + +/// Returns the OpenCL context associated with an engine. +/// +/// @param engine Engine to query. +/// @param context Output underlying OpenCL context of the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_engine_get_context( + dnnl_engine_t engine, cl_context *context); + +/// Returns the OpenCL device associated with an engine. +/// +/// @param engine Engine to query. +/// @param device Output underlying OpenCL device of the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_get_device( + dnnl_engine_t engine, cl_device_id *device); + +/// Creates an execution stream for a given engine associated with +/// an OpenCL command queue. +/// +/// @param stream Output execution stream. +/// @param engine Engine to create the execution stream on. +/// @param queue OpenCL command queue to use. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_stream_create( + dnnl_stream_t *stream, dnnl_engine_t engine, cl_command_queue queue); + +/// Returns the OpenCL command queue associated with an execution stream. +/// +/// @param stream Execution stream to query. +/// @param queue Output OpenCL command queue. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_stream_get_command_queue( + dnnl_stream_t stream, cl_command_queue *queue); + +/// Executes computations specified by the primitive in a specified stream and +/// returns an OpenCL event. +/// +/// @param primitive Primitive to execute. +/// @param stream Stream to use. +/// @param nargs Number of arguments. +/// @param args Array of arguments. Each argument is an +/// pair. The index is one of the `DNNL_ARG_*` +/// values such as `DNNL_ARG_SRC`. Unless runtime shapes are used (see +/// #DNNL_RUNTIME_DIM_VAL), the memory object must have the same memory +/// descriptor as that returned by +/// #dnnl_primitive_desc_query_md(#dnnl_query_exec_arg_md, index). +/// @param deps A pointer to a vector of size @p ndeps that contains +/// dependencies. +/// @param ndeps Number of dependencies. +/// @param return_event Output event. It's the user's responsibility to +/// manage lifetime of the event. Can be NULL. When @p stream is in-order +/// NULL will be returned. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ocl_interop_primitive_execute( + const_dnnl_primitive_t primitive, dnnl_stream_t stream, int nargs, + const dnnl_exec_arg_t *args, const cl_event *deps, int ndeps, + cl_event *return_event); + +/// @} dnnl_api_ocl_interop + +/// @} dnnl_api_interop + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..36416511e4f473e647f49a593f0ad1937462ecef --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl.hpp @@ -0,0 +1,445 @@ +/******************************************************************************* +* Copyright 2020-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_OCL_HPP +#define ONEAPI_DNNL_DNNL_OCL_HPP + +#include "oneapi/dnnl/dnnl.hpp" + +/// @cond DO_NOT_DOCUMENT_THIS +#include +#include +#include +#include +#include +#include +#include + +#include "oneapi/dnnl/dnnl_ocl.h" + +#include +/// @endcond + +/// @addtogroup dnnl_api +/// @{ + +namespace dnnl { + +/// @addtogroup dnnl_api_interop Runtime interoperability API +/// API extensions to interact with the underlying run-time. +/// @{ + +/// @addtogroup dnnl_api_ocl_interop OpenCL interoperability API +/// API extensions to interact with the underlying OpenCL run-time. +/// +/// @sa @ref dev_guide_opencl_interoperability in developer guide +/// @{ + +/// OpenCL interoperability namespace +namespace ocl_interop { + +/// Memory allocation kind. +enum class memory_kind { + /// USM (device, shared, host, or unknown) memory allocation kind. + usm = dnnl_ocl_interop_usm, + /// Buffer memory allocation kind - default. + buffer = dnnl_ocl_interop_buffer, +}; + +/// Converts a memory allocation kind enum value from C++ API to C API type. +/// +/// @param akind C++ API memory allocation kind enum value. +/// @returns Corresponding C API memory allocation kind enum value. +inline dnnl_ocl_interop_memory_kind_t convert_to_c(memory_kind akind) { + return static_cast(akind); +} + +/// Returns the cache blob ID of the OpenCL device. +/// +/// @warning +/// This API is intended to be used with +/// #dnnl::ocl_interop::get_engine_cache_blob() and +/// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector &). +/// The returned cache blob ID can only be used as an ID of the cache blob +/// returned by #dnnl::ocl_interop::get_engine_cache_blob(). +/// +/// @note The cache blob ID can be empty (@p size will be 0 and +/// @p cache_blob_id will be nullptr) if oneDNN doesn't have anything to +/// put in the cache blob. (#dnnl_ocl_interop_engine_get_cache_blob will +/// return an empty cache blob). +/// +/// @param device An OpenCL device. +/// @returns A vector containing the cache blob ID. +inline std::vector get_engine_cache_blob_id(cl_device_id device) { + size_t size = 0; + error::wrap_c_api( + dnnl_ocl_interop_engine_get_cache_blob_id(device, &size, nullptr), + "could not get an engine cache blob id size"); + + std::vector cache_blob_id(size); + error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob_id( + device, &size, cache_blob_id.data()), + "could not get an engine cache blob id"); + return cache_blob_id; +} + +/// Returns a cache blob for the engine. +/// +/// @note The cache blob vector can be empty if oneDNN doesn't have anything +/// to put in the cache blob. It's the user's responsibility to check +/// whether it's empty prior to passing it to +/// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector &) +/// +/// @param aengine Engine to query for the cache blob. +/// @returns Vector containing the cache blob. +inline std::vector get_engine_cache_blob(const engine &aengine) { + size_t size = 0; + error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob( + aengine.get(), &size, nullptr), + "could not get an engine cache blob size"); + + std::vector cache_blob(size); + error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob( + aengine.get(), &size, cache_blob.data()), + "could not get an engine cache blob"); + return cache_blob; +} + +/// Constructs an engine from the given cache blob. +/// +/// @param device The OpenCL device that this engine will encapsulate. +/// @param context The OpenCL context (containing the device) that this +/// engine will use for all operations. +/// @param cache_blob Cache blob. +/// @returns An engine. +inline engine make_engine(cl_device_id device, cl_context context, + const std::vector &cache_blob) { + dnnl_engine_t c_engine; + error::wrap_c_api( + dnnl_ocl_interop_engine_create_from_cache_blob(&c_engine, device, + context, cache_blob.size(), cache_blob.data()), + "could not create an engine from cache blob"); + return engine(c_engine); +} + +/// Constructs an engine from OpenCL device and context objects. +/// +/// @param device The OpenCL device that this engine will encapsulate. +/// @param context The OpenCL context (containing the device) that this +/// engine will use for all operations. +/// @returns An engine. +inline engine make_engine(cl_device_id device, cl_context context) { + dnnl_engine_t c_engine; + error::wrap_c_api( + dnnl_ocl_interop_engine_create(&c_engine, device, context), + "could not create an engine"); + return engine(c_engine); +} + +/// Returns OpenCL context associated with the engine. +/// +/// @param aengine An engine. +/// @returns Underlying OpenCL context. +inline cl_context get_context(const engine &aengine) { + cl_context context = nullptr; + error::wrap_c_api( + dnnl_ocl_interop_engine_get_context(aengine.get(), &context), + "could not get an OpenCL context from an engine"); + return context; +} + +/// Returns OpenCL device associated with the engine. +/// +/// @param aengine An engine. +/// @returns Underlying OpenCL device. +inline cl_device_id get_device(const engine &aengine) { + cl_device_id device = nullptr; + error::wrap_c_api(dnnl_ocl_interop_get_device(aengine.get(), &device), + "could not get an OpenCL device from an engine"); + return device; +} + +/// Constructs an execution stream for the specified engine and OpenCL queue. +/// +/// @param aengine Engine to create the stream on. +/// @param queue OpenCL queue to use for the stream. +/// @returns An execution stream. +inline stream make_stream(const engine &aengine, cl_command_queue queue) { + dnnl_stream_t c_stream; + error::wrap_c_api( + dnnl_ocl_interop_stream_create(&c_stream, aengine.get(), queue), + "could not create a stream"); + return stream(c_stream); +} + +/// Returns OpenCL queue object associated with the execution stream. +/// +/// @param astream An execution stream. +/// @returns Underlying OpenCL queue. +inline cl_command_queue get_command_queue(const stream &astream) { + cl_command_queue queue = nullptr; + error::wrap_c_api( + dnnl_ocl_interop_stream_get_command_queue(astream.get(), &queue), + "could not get an OpenCL command queue from a stream"); + return queue; +} + +/// Returns the OpenCL memory object associated with the memory object. +/// +/// @param amemory A memory object. +/// @returns Underlying OpenCL memory object. +inline cl_mem get_mem_object(const memory &amemory) { + cl_mem mem_object; + error::wrap_c_api( + dnnl_ocl_interop_memory_get_mem_object(amemory.get(), &mem_object), + "could not get OpenCL buffer object from a memory object"); + return mem_object; +} + +/// Sets the OpenCL memory object associated with the memory object. +/// +/// For behavioral details see memory::set_data_handle(). +/// +/// @param amemory A memory object. +/// @param mem_object OpenCL cl_mem object to use as the underlying +/// storage. It must have at least get_desc().get_size() bytes +/// allocated. +inline void set_mem_object(memory &amemory, cl_mem mem_object) { + error::wrap_c_api( + dnnl_ocl_interop_memory_set_mem_object(amemory.get(), mem_object), + "could not set OpenCL buffer object from a memory object"); +} + +/// Returns the memory allocation kind associated with a memory object. +/// +/// @param amemory A memory object. +/// +/// @returns The underlying memory allocation kind of the memory object. +inline memory_kind get_memory_kind(const memory &amemory) { + dnnl_ocl_interop_memory_kind_t ckind; + error::wrap_c_api( + dnnl_ocl_interop_memory_get_memory_kind(amemory.get(), &ckind), + "could not get memory kind"); + return static_cast(ckind); +} + +#ifdef DNNL_EXPERIMENTAL_SPARSE +/// Creates a memory object with multiple handles. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handles. +/// @param handles Handles of the memory buffers to use as underlying storages. +/// For each element of the @p handles array the following applies: +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl_ocl_interop_usm. +/// - An OpenCL buffer. In this case the library doesn't own the buffer. +/// Requires @p memory_kind be equal to be equal to dnnl_ocl_interop_buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// If the @p handles vector is not provided the library will allocate all +/// buffers as if all handles have the special value DNNL_MEMORY_ALLOCATE. +/// +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, + std::vector handles = {}) { + if (handles.empty()) { + const int nhandles = memory_desc.get_num_handles(); + handles.resize(nhandles, DNNL_MEMORY_ALLOCATE); + } + + dnnl_memory_t c_memory; + error::wrap_c_api( + dnnl_ocl_interop_memory_create_v2(&c_memory, memory_desc.get(), + aengine.get(), convert_to_c(kind), (int)handles.size(), + handles.data()), + "could not create a memory"); + return memory(c_memory); +} + +/// Constructs a memory object with multiple OpenCL buffers. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param mem_objects A vector of OpenCL buffers to use. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, std::vector mem_objects) { + const int nhandles = memory_desc.get_num_handles(); + std::vector handles(nhandles, DNNL_MEMORY_NONE); + memory amemory(memory_desc, aengine, handles); + for (int i = 0; i < nhandles; i++) + amemory.set_data_handle(mem_objects[i], i); + return amemory; +} + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is +/// equal to dnnl::ocl_interop::memory_kind::usm, or +/// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is +/// equal to dnnl::ocl_interop::memory_kind::buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl::ocl_interop::memory_kind::usm. +/// - An OpenCL buffer. In this case the library doesn't own the buffer. +/// Requires @p memory_kind be equal to be equal to +/// dnnl::ocl_interop::memory_kind::buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, void *handle) { + return make_memory( + memory_desc, aengine, kind, std::vector {handle}); +} + +/// Constructs a memory object from an OpenCL buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param mem_object An OpenCL buffer to use. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, cl_mem mem_object) { + return make_memory(memory_desc, aengine, std::vector {mem_object}); +} +#else + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is +/// equal to dnnl::ocl_interop::memory_kind::usm, or +/// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is +/// equal to dnnl::ocl_interop::memory_kind::buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl::ocl_interop::memory_kind::usm. +/// - An OpenCL buffer. In this case the library doesn't own the buffer. +/// Requires @p memory_kind be equal to be equal to +/// dnnl::ocl_interop::memory_kind::buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, + void *handle = DNNL_MEMORY_ALLOCATE) { + dnnl_memory_t c_memory; + error::wrap_c_api( + dnnl_ocl_interop_memory_create(&c_memory, memory_desc.get(), + aengine.get(), convert_to_c(kind), handle), + "could not create a memory"); + return memory(c_memory); +} + +/// Constructs a memory object from an OpenCL buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param mem_object An OpenCL buffer to use. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, cl_mem mem_object) { + memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE); + set_mem_object(amemory, mem_object); + return amemory; +} +#endif + +/// Executes computations specified by the primitive in a specified stream and +/// returns a SYCL event. +/// +/// Arguments are passed via an arguments map containing +/// pairs. The index must be one of the `DNNL_ARG_*` +/// values such as `DNNL_ARG_SRC`, and the memory must have a memory descriptor +/// matching the one returned by +/// #dnnl::primitive_desc::query_md(#query::exec_arg_md, index) unless using +/// dynamic shapes (see #DNNL_RUNTIME_DIM_VAL). +/// +/// @param aprimitive Primitive to execute. +/// @param astream Stream object. The stream must belong to the same engine +/// as the primitive. +/// @param args Arguments map. +/// @param deps Optional vector with `cl_event` dependencies. +/// +/// @returns Output event. It's the user's responsibility to manage lifetime +/// of the event. +inline cl_event execute(const dnnl::primitive &aprimitive, + const stream &astream, const std::unordered_map &args, + const std::vector &deps = {}) { + std::vector c_args; + c_args.reserve(args.size()); + for (const auto &a : args) + c_args.push_back({a.first, a.second.get()}); + + const cl_event *c_deps = deps.empty() ? nullptr : deps.data(); + + cl_event return_event; + error::wrap_c_api(dnnl_ocl_interop_primitive_execute(aprimitive.get(), + astream.get(), (int)c_args.size(), c_args.data(), + c_deps, (int)deps.size(), &return_event), + "could not execute a primitive"); + return return_event; +} + +} // namespace ocl_interop + +/// @} dnnl_api_ocl_interop + +/// @} dnnl_api_interop + +} // namespace dnnl + +/// @} dnnl_api + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl_types.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl_types.h new file mode 100644 index 0000000000000000000000000000000000000000..168e1259dd8d60c23640bdf405334eda6482841a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ocl_types.h @@ -0,0 +1,51 @@ +/******************************************************************************* +* Copyright 2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_OCL_TYPES_H +#define ONEAPI_DNNL_DNNL_OCL_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_ocl_interop +/// @{ + +/// Memory allocation kind. +typedef enum { + /// USM (device, shared, host, or unknown) memory allocation kind. + dnnl_ocl_interop_usm, + /// Buffer memory allocation kind - default. + dnnl_ocl_interop_buffer, +} dnnl_ocl_interop_memory_kind_t; + +/// @} dnnl_api_ocl_interop + +/// @} dnnl_api_interop + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.h new file mode 100644 index 0000000000000000000000000000000000000000..09f7d632a8ab51ed5ee5a4898fac6bbaef309cab --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.h @@ -0,0 +1,199 @@ +/******************************************************************************* +* Copyright 2020-2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_SYCL_H +#define ONEAPI_DNNL_DNNL_SYCL_H + +#include "oneapi/dnnl/dnnl.h" + +#include "oneapi/dnnl/dnnl_sycl_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_sycl_interop +/// @{ + +/// Creates an engine associated with a SYCL device and a SYCL context. +/// +/// @param engine Output engine. +/// @param device Pointer to the SYCL device to use for the engine. +/// @param context Pointer to the SYCL context to use for the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_engine_create( + dnnl_engine_t *engine, const void *device, const void *context); + +/// Returns the SYCL context associated with an engine. +/// +/// @param engine Engine to query. +/// @param context Pointer to the underlying SYCL context of the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_engine_get_context( + dnnl_engine_t engine, void **context); + +/// Returns the SYCL device associated with an engine. +/// +/// @param engine Engine to query. +/// @param device Pointer to the underlying SYCL device of the engine. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_engine_get_device( + dnnl_engine_t engine, void **device); + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl_memory_set_data_handle() had been called, if @p memory_kind is equal +/// to dnnl_sycl_interop_usm, or +/// - dnnl_sycl_interop_memory_set_buffer() has been called, if @p memory_kind +/// is equal to dnnl_sycl_interop_buffer. +/// +/// @param memory Output memory object. +/// @param memory_desc Memory descriptor. +/// @param engine Engine to use. +/// @param memory_kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl_sycl_interop_usm. +/// - A pointer to SYCL buffer. In this case the library doesn't own the +/// buffer. Requires @p memory_kind be equal to be equal to +/// dnnl_sycl_interop_buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create(dnnl_memory_t *memory, + const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine, + dnnl_sycl_interop_memory_kind_t memory_kind, void *handle); + +#ifdef DNNL_EXPERIMENTAL_SPARSE +/// Creates a memory object with multiple handles. +/// +/// @param memory Output memory object. +/// @param memory_desc Memory descriptor. +/// @param engine Engine to use. +/// @param memory_kind Memory allocation kind to specify the type of handles. +/// @param nhandles Number of handles. +/// @param handles Handles of the memory buffers to use as underlying storages. +/// For each element of the @p handles array the following applies: +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl_sycl_interop_usm. +/// - A pointer to SYCL buffer. In this case the library doesn't own the +/// buffer. Requires @p memory_kind be equal to be equal to +/// dnnl_sycl_interop_buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create_v2(dnnl_memory_t *memory, + const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine, + dnnl_sycl_interop_memory_kind_t memory_kind, int nhandles, + void **handles); +#endif + +/// Returns the memory allocation kind associated with a memory object. +/// +/// @param memory Memory to query. +/// @param memory_kind Output underlying memory allocation kind of the memory +/// object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_memory_get_memory_kind( + const_dnnl_memory_t memory, + dnnl_sycl_interop_memory_kind_t *memory_kind); + +/// Sets a SYCL buffer for a memory object. +/// +/// @param memory Memory object. +/// @param buffer SYCL buffer to be set in the memory object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_memory_set_buffer( + dnnl_memory_t memory, void *buffer); + +/// Creates an execution stream for a given engine associated with a SYCL +/// queue. +/// +/// @param stream Output execution stream. +/// @param engine Engine to create the execution stream on. +/// @param queue SYCL queue to use. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_stream_create( + dnnl_stream_t *stream, dnnl_engine_t engine, void *queue); + +/// Returns the SYCL queue associated with an execution stream. +/// +/// @param stream Execution stream to query. +/// @param queue Output SYCL command queue. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_stream_get_queue( + dnnl_stream_t stream, void **queue); + +/// Executes computations specified by the primitive in a specified stream and +/// returns a SYCL event. +/// +/// @param primitive Primitive to execute. +/// @param stream Stream to use. +/// @param nargs Number of arguments. +/// @param args Array of arguments. Each argument is an +/// pair. The index is one of the `DNNL_ARG_*` +/// values such as `DNNL_ARG_SRC`. Unless runtime shapes are used (see +/// #DNNL_RUNTIME_DIM_VAL), the memory object must have the same memory +/// descriptor as that returned by +/// #dnnl_primitive_desc_query_md(#dnnl_query_exec_arg_md, index). +/// @param deps A pointer to std::vector that contains +/// dependencies. +/// @param return_event Output event. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_sycl_interop_primitive_execute( + const_dnnl_primitive_t primitive, dnnl_stream_t stream, int nargs, + const dnnl_exec_arg_t *args, const void *deps, void *return_event); + +/// @} dnnl_api_sycl_interop + +/// @} dnnl_api_interop + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c5844e9bbbdfe0c0e5900da89374c9842d24ffbf --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl.hpp @@ -0,0 +1,384 @@ +/******************************************************************************* +* Copyright 2020-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_SYCL_HPP +#define ONEAPI_DNNL_DNNL_SYCL_HPP + +/// @cond DO_NOT_DOCUMENT_THIS +#include +#include +#include +#include +#include +#include +#include + +#if __has_include() +#include +#else +#error "Unsupported compiler" +#endif + +#include "oneapi/dnnl/dnnl.hpp" +#include "oneapi/dnnl/dnnl_sycl.h" + +/// @endcond + +/// @addtogroup dnnl_api +/// @{ + +namespace dnnl { + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_sycl_interop SYCL interoperability API +/// API extensions to interact with the underlying SYCL run-time. +/// +/// @sa @ref dev_guide_dpcpp_interoperability in developer guide +/// @{ + +/// SYCL interoperability namespace +namespace sycl_interop { + +/// Memory allocation kind. +enum class memory_kind { + /// USM (device, shared, host, or unknown) memory allocation kind - default. + usm = dnnl_sycl_interop_usm, + /// Buffer memory allocation kind. + buffer = dnnl_sycl_interop_buffer, +}; + +/// Converts a memory allocation kind enum value from C++ API to C API type. +/// +/// @param akind C++ API memory allocation kind enum value. +/// @returns Corresponding C API memory allocation kind enum value. +inline dnnl_sycl_interop_memory_kind_t convert_to_c(memory_kind akind) { + return static_cast(akind); +} + +/// Constructs an engine from SYCL device and context objects. +/// +/// @param adevice SYCL device. +/// @param acontext SYCL context. +/// +/// @returns Created engine. +inline engine make_engine( + const sycl::device &adevice, const sycl::context &acontext) { + dnnl_engine_t aengine; + error::wrap_c_api(dnnl_sycl_interop_engine_create(&aengine, + static_cast(&adevice), + static_cast(&acontext)), + "could not create an engine"); + return engine(aengine); +} + +/// Returns the SYCL context associated with an engine. +/// +/// @param aengine Engine to query. +/// +/// @returns The underlying SYCL device of the engine. +inline sycl::context get_context(const engine &aengine) { + void *ctx_ptr; + error::wrap_c_api( + dnnl_sycl_interop_engine_get_context(aengine.get(), &ctx_ptr), + "could not get a context handle"); + auto ctx = *static_cast(ctx_ptr); + return ctx; +} + +/// Returns the SYCL device associated with an engine. +/// +/// @param aengine Engine to query. +/// +/// @returns The underlying SYCL context of the engine. +inline sycl::device get_device(const engine &aengine) { + void *dev_ptr; + error::wrap_c_api( + dnnl_sycl_interop_engine_get_device(aengine.get(), &dev_ptr), + "could not get a device handle"); + auto dev = *static_cast(dev_ptr); + return dev; +} + +/// Creates an execution stream for a given engine associated with a SYCL +/// queue. +/// +/// @param aengine Engine object to use for the stream. +/// @param aqueue SYCL queue to use for the stream. +/// +/// @returns An execution stream. +inline stream make_stream(const engine &aengine, sycl::queue &aqueue) { + dnnl_stream_t astream; + error::wrap_c_api( + dnnl_sycl_interop_stream_create(&astream, aengine.get(), &aqueue), + "could not create a stream"); + return stream(astream); +} + +/// Returns the SYCL queue associated with an execution stream. +/// +/// @param astream Execution stream to query. +/// +/// @returns SYCL queue object. +inline sycl::queue get_queue(const stream &astream) { + void *queue_ptr; + error::wrap_c_api( + dnnl_sycl_interop_stream_get_queue(astream.get(), &queue_ptr), + "could not get a stream handle"); + auto queue = *static_cast(queue_ptr); + return queue; +} + +/// Returns the SYCL buffer associated with a memory object. +/// +/// Throws an exception if the memory allocation kind associated with the +/// memory object is not equal to dnnl::sycl_interop::memory_kind::buffer. +/// +/// @tparam T Type of the requested buffer. +/// @tparam ndims Number of dimensions of the requested buffer. +/// @param amemory Memory object. +/// +/// @returns SYCL buffer associated with the memory object. +template +sycl::buffer get_buffer(const memory &amemory) { + static_assert(ndims == 1, "only 1D buffers supported"); + + // XXX: workaround: when CPU runtime is not SYCL and amemory was created + // for CPU engine `get_buffer` should return an error. Use interop API to + // implement the check. + dnnl_sycl_interop_memory_kind_t ckind; + error::wrap_c_api( + dnnl_sycl_interop_memory_get_memory_kind(amemory.get(), &ckind), + "could not get SYCL buffer object"); + + void *handle_ptr; + error::wrap_c_api(dnnl_memory_get_data_handle(amemory.get(), &handle_ptr), + "could not get SYCL buffer object"); + + // XXX: workaround: zero-range buffer cannot be constructed. + if (!handle_ptr) return sycl::buffer(sycl::range<1>(1)); + + auto &buf_u8 = *static_cast *>(handle_ptr); + + auto range = sycl::range<1>(buf_u8.byte_size() / sizeof(T)); + return buf_u8.reinterpret(range); +} + +/// Sets SYCL buffer associated with a memory object. +/// +/// @tparam T Type of the buffer. +/// @tparam ndims Number of dimensions of the buffer. +/// @param amemory Memory object to change. +/// @param abuffer SYCL buffer. +template +void set_buffer(memory &amemory, sycl::buffer &abuffer) { + auto range = sycl::range<1>(abuffer.byte_size()); + auto buf_u8 = abuffer.template reinterpret(range); + error::wrap_c_api(dnnl_sycl_interop_memory_set_buffer( + amemory.get(), static_cast(&buf_u8)), + "could not set SYCL buffer object"); +} + +/// Returns the memory allocation kind associated with a memory object. +/// +/// @param amemory A memory object. +/// +/// @returns The underlying memory allocation kind of the memory object. +inline memory_kind get_memory_kind(const memory &amemory) { + dnnl_sycl_interop_memory_kind_t ckind; + error::wrap_c_api( + dnnl_sycl_interop_memory_get_memory_kind(amemory.get(), &ckind), + "could not get memory kind"); + return static_cast(ckind); +} + +#ifdef DNNL_EXPERIMENTAL_SPARSE +/// Creates a memory object with multiple handles. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handles. +/// @param handles Handles of the memory buffers to use as underlying storages. +/// For each element of the @p handles array the following applies: +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl::sycl_interop::memory_kind::usm. +/// - A pointer to SYCL buffer. In this case the library doesn't own the +/// buffer. Requires @p memory_kind be equal to be equal to +/// dnnl::sycl_interop::memory_kind::buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// If the @p handles vector is not provided the library will allocate all +/// buffers as if all handles have the special value DNNL_MEMORY_ALLOCATE. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, + std::vector handles = {}) { + if (handles.empty()) { + const int nhandles = memory_desc.get_num_handles(); + handles.resize(nhandles, DNNL_MEMORY_ALLOCATE); + } + + dnnl_memory_t c_memory; + error::wrap_c_api( + dnnl_sycl_interop_memory_create_v2(&c_memory, memory_desc.get(), + aengine.get(), convert_to_c(kind), (int)handles.size(), + handles.data()), + "could not create a memory"); + return memory(c_memory); +} + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is +/// equal to dnnl::sycl_interop::memory_kind::usm, or +/// - dnnl::sycl_interop::set_buffer() has been called, if @p memory_kind is +/// equal to dnnl::sycl_interop::memory_kind::buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl::sycl_interop::memory_kind::usm. +/// - A pointer to SYCL buffer. In this case the library doesn't own the +/// buffer. Requires @p memory_kind be equal to be equal to +/// dnnl::sycl_interop::memory_kind::buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, void *handle) { + return make_memory( + memory_desc, aengine, kind, std::vector {handle}); +} +#else + +/// Creates a memory object. +/// +/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the +/// constructed memory object will have the underlying buffer set. In this +/// case, the buffer will be initialized as if: +/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is +/// equal to dnnl::sycl_interop::memory_kind::usm, or +/// - dnnl::sycl_interop::set_buffer() has been called, if @p memory_kind is +/// equal to dnnl::sycl_interop::memory_kind::buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param kind Memory allocation kind to specify the type of handle. +/// @param handle Handle of the memory buffer to use as an underlying storage. +/// - A USM pointer to the user-allocated buffer. In this case the library +/// doesn't own the buffer. Requires @p memory_kind to be equal to +/// dnnl::sycl_interop::memory_kind::usm. +/// - A pointer to SYCL buffer. In this case the library doesn't own the +/// buffer. Requires @p memory_kind be equal to be equal to +/// dnnl::sycl_interop::memory_kind::buffer. +/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to +/// allocate the buffer that corresponds to the memory allocation kind +/// @p memory_kind for the memory object. In this case the library +/// owns the buffer. +/// - The DNNL_MEMORY_NONE specific value. Instructs the library to +/// create memory object without an underlying buffer. +/// +/// @returns Created memory object. +inline memory make_memory(const memory::desc &memory_desc, + const engine &aengine, memory_kind kind, + void *handle = DNNL_MEMORY_ALLOCATE) { + dnnl_memory_t c_memory; + error::wrap_c_api( + dnnl_sycl_interop_memory_create(&c_memory, memory_desc.get(), + aengine.get(), convert_to_c(kind), handle), + "could not create a memory"); + return memory(c_memory); +} +#endif + +/// Constructs a memory object from a SYCL buffer. +/// +/// @param memory_desc Memory descriptor. +/// @param aengine Engine to use. +/// @param abuffer A SYCL buffer to use. +/// +/// @returns Created memory object. +template +memory make_memory(const memory::desc &memory_desc, const engine &aengine, + sycl::buffer &abuffer) { + memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE); + set_buffer(amemory, abuffer); + return amemory; +} + +/// Executes computations specified by the primitive in a specified stream and +/// returns a SYCL event. +/// +/// Arguments are passed via an arguments map containing +/// pairs. The index must be one of the `DNNL_ARG_*` +/// values such as `DNNL_ARG_SRC`, and the memory must have a memory descriptor +/// matching the one returned by +/// #dnnl::primitive_desc::query_md(#query::exec_arg_md, index) unless using +/// dynamic shapes (see #DNNL_RUNTIME_DIM_VAL). +/// +/// @param aprimitive Primitive to execute. +/// @param astream Stream object. The stream must belong to the same engine +/// as the primitive. +/// @param args Arguments map. +/// @param deps Optional vector with `sycl::event` dependencies. +/// +/// @returns Output event. +inline sycl::event execute(const dnnl::primitive &aprimitive, + const stream &astream, const std::unordered_map &args, + const std::vector &deps = {}) { + std::vector c_args; + c_args.reserve(args.size()); + for (const auto &a : args) + c_args.push_back({a.first, a.second.get()}); + + sycl::event return_event; + error::wrap_c_api( + dnnl_sycl_interop_primitive_execute(aprimitive.get(), astream.get(), + (int)c_args.size(), c_args.data(), &deps, &return_event), + "could not execute a primitive"); + return return_event; +} + +} // namespace sycl_interop + +/// @} dnnl_api_sycl_interop + +/// @} dnnl_api_interop + +} // namespace dnnl + +/// @} dnnl_api + +#endif // DNNL_SYCL_HPP diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl_types.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl_types.h new file mode 100644 index 0000000000000000000000000000000000000000..979d4cddaad34d3980ceb07ad785bc3f00d49cf6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_sycl_types.h @@ -0,0 +1,51 @@ +/******************************************************************************* +* Copyright 2020-2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_SYCL_TYPES_H +#define ONEAPI_DNNL_DNNL_SYCL_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_sycl_interop +/// @{ + +/// Memory allocation kind. +typedef enum { + /// USM (device, shared, host, or unknown) memory allocation kind - default. + dnnl_sycl_interop_usm, + /// Buffer memory allocation kind. + dnnl_sycl_interop_buffer, +} dnnl_sycl_interop_memory_kind_t; + +/// @} dnnl_api_sycl_interop + +/// @} dnnl_api_interop + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.h new file mode 100644 index 0000000000000000000000000000000000000000..e6a931b702e3b8cba222782d3f5d656074986243 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.h @@ -0,0 +1,118 @@ +/******************************************************************************* +* Copyright 2020-2022 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_THREADPOOL_H +#define ONEAPI_DNNL_DNNL_THREADPOOL_H + +#include "oneapi/dnnl/dnnl_config.h" +#include "oneapi/dnnl/dnnl_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_threadpool_interop +/// @{ + +/// Creates an execution stream with specified threadpool. +/// +/// @sa @ref dev_guide_threadpool +/// +/// @param stream Output execution stream. +/// @param engine Engine to create the execution stream on. +/// @param threadpool Pointer to an instance of a C++ class that implements +/// dnnl::threapdool_iface interface. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_threadpool_interop_stream_create( + dnnl_stream_t *stream, dnnl_engine_t engine, void *threadpool); + +/// Returns a threadpool to be used by the execution stream. +/// +/// @sa @ref dev_guide_threadpool +/// +/// @param astream Execution stream. +/// @param threadpool Output pointer to an instance of a C++ class that +/// implements dnnl::threapdool_iface interface. Set to NULL if the +/// stream was created without threadpool. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_threadpool_interop_stream_get_threadpool( + dnnl_stream_t astream, void **threadpool); + +/// Sets the maximum concurrency assumed by oneDNN when outside a +/// parallel call. +/// +/// @param max_concurrency The maximum concurrency assumed by oneDNN +/// when outside a parallel call. This is a threadlocal setting. +/// @returns #dnnl_success on success and a status describing the +/// error otherwise. +dnnl_status_t DNNL_API dnnl_threadpool_interop_set_max_concurrency( + int max_concurrency); + +/// Gets the maximum concurrency assumed by oneDNN when outside a +/// parallel call. +/// +/// @param max_concurrency The maximum concurrency assumed by oneDNN +/// when outside a parallel call. This is a threadlocal setting. +/// @returns #dnnl_success on success and a status describing the +/// error otherwise. +dnnl_status_t DNNL_API dnnl_threadpool_interop_get_max_concurrency( + int *max_concurrency); + +/// @copydoc dnnl_sgemm() +/// @param threadpool A pointer to a threadpool interface (only when built with +/// the THREADPOOL CPU runtime). +dnnl_status_t DNNL_API dnnl_threadpool_interop_sgemm(char transa, char transb, + dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, float alpha, const float *A, + dnnl_dim_t lda, const float *B, dnnl_dim_t ldb, float beta, float *C, + dnnl_dim_t ldc, void *threadpool); + +/// @copydoc dnnl_gemm_u8s8s32() +/// @param threadpool A pointer to a threadpool interface (only when built with +/// the THREADPOOL CPU runtime). +dnnl_status_t DNNL_API dnnl_threadpool_interop_gemm_u8s8s32(char transa, + char transb, char offsetc, dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, + float alpha, const uint8_t *A, dnnl_dim_t lda, uint8_t ao, + const int8_t *B, dnnl_dim_t ldb, int8_t bo, float beta, int32_t *C, + dnnl_dim_t ldc, const int32_t *co, void *threadpool); + +/// @copydoc dnnl_gemm_s8s8s32() +/// @param threadpool A pointer to a threadpool interface (only when built with +/// the THREADPOOL CPU runtime). +dnnl_status_t DNNL_API dnnl_threadpool_interop_gemm_s8s8s32(char transa, + char transb, char offsetc, dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, + float alpha, const int8_t *A, dnnl_dim_t lda, int8_t ao, + const int8_t *B, dnnl_dim_t ldb, int8_t bo, float beta, int32_t *C, + dnnl_dim_t ldc, const int32_t *co, void *threadpool); + +/// @} dnnl_api_threadpool_interop + +/// @} dnnl_api_interop + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9d9e804e14080f3e70f0c4364b298eaff97aedae --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool.hpp @@ -0,0 +1,113 @@ +/******************************************************************************* +* Copyright 2020-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_THREADPOOL_HPP +#define ONEAPI_DNNL_DNNL_THREADPOOL_HPP + +#include "oneapi/dnnl/dnnl.hpp" +#include "oneapi/dnnl/dnnl_threadpool.h" + +#include "oneapi/dnnl/dnnl_threadpool_iface.hpp" + +/// @addtogroup dnnl_api +/// @{ + +namespace dnnl { + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_threadpool_interop Threadpool interoperability API +/// API extensions to interact with the underlying Threadpool run-time. +/// @{ + +/// Threadpool interoperability namespace +namespace threadpool_interop { + +/// Constructs an execution stream for the specified engine and threadpool. +/// +/// @sa @ref dev_guide_threadpool +/// +/// @param aengine Engine to create the stream on. +/// @param threadpool Pointer to an instance of a C++ class that implements +/// dnnl::threapdool_iface interface. +/// @returns An execution stream. +inline dnnl::stream make_stream( + const dnnl::engine &aengine, threadpool_iface *threadpool) { + dnnl_stream_t c_stream; + dnnl::error::wrap_c_api(dnnl_threadpool_interop_stream_create( + &c_stream, aengine.get(), threadpool), + "could not create stream"); + return dnnl::stream(c_stream); +} + +/// Returns the pointer to a threadpool that is used by an execution stream. +/// +/// @sa @ref dev_guide_threadpool +/// +/// @param astream An execution stream. +/// @returns Output pointer to an instance of a C++ class that implements +/// dnnl::threapdool_iface interface or NULL if the stream was created +/// without threadpool. +inline threadpool_iface *get_threadpool(const dnnl::stream &astream) { + void *tp; + dnnl::error::wrap_c_api( + dnnl_threadpool_interop_stream_get_threadpool(astream.get(), &tp), + "could not get stream threadpool"); + return static_cast(tp); +} + +/// @copydoc dnnl_threadpool_interop_sgemm() +inline status sgemm(char transa, char transb, dnnl_dim_t M, dnnl_dim_t N, + dnnl_dim_t K, float alpha, const float *A, dnnl_dim_t lda, + const float *B, dnnl_dim_t ldb, float beta, float *C, dnnl_dim_t ldc, + threadpool_iface *threadpool) { + return static_cast(dnnl_threadpool_interop_sgemm(transa, transb, M, + N, K, alpha, A, lda, B, ldb, beta, C, ldc, threadpool)); +} +/// @copydoc dnnl_threadpool_interop_gemm_u8s8s32() +inline status gemm_u8s8s32(char transa, char transb, char offsetc, dnnl_dim_t M, + dnnl_dim_t N, dnnl_dim_t K, float alpha, const uint8_t *A, + dnnl_dim_t lda, uint8_t ao, const int8_t *B, dnnl_dim_t ldb, int8_t bo, + float beta, int32_t *C, dnnl_dim_t ldc, const int32_t *co, + threadpool_iface *threadpool) { + return static_cast(dnnl_threadpool_interop_gemm_u8s8s32(transa, + transb, offsetc, M, N, K, alpha, A, lda, ao, B, ldb, bo, beta, C, + ldc, co, threadpool)); +} + +/// @copydoc dnnl_threadpool_interop_gemm_s8s8s32() +inline status gemm_s8s8s32(char transa, char transb, char offsetc, dnnl_dim_t M, + dnnl_dim_t N, dnnl_dim_t K, float alpha, const int8_t *A, + dnnl_dim_t lda, int8_t ao, const int8_t *B, dnnl_dim_t ldb, int8_t bo, + float beta, int32_t *C, dnnl_dim_t ldc, const int32_t *co, + threadpool_iface *threadpool) { + return static_cast(dnnl_threadpool_interop_gemm_s8s8s32(transa, + transb, offsetc, M, N, K, alpha, A, lda, ao, B, ldb, bo, beta, C, + ldc, co, threadpool)); +} + +} // namespace threadpool_interop + +/// @} dnnl_api_threadpool_interop + +/// @} dnnl_api_interop + +} // namespace dnnl + +/// @} dnnl_api + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool_iface.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool_iface.hpp new file mode 100644 index 0000000000000000000000000000000000000000..192952e9e55f48614fe1b6709c7f3383e6e53c55 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_threadpool_iface.hpp @@ -0,0 +1,73 @@ +/******************************************************************************* +* Copyright 2020-2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_THREADPOOL_IFACE_HPP +#define ONEAPI_DNNL_DNNL_THREADPOOL_IFACE_HPP + +#include +#include + +/// @addtogroup dnnl_api +/// @{ + +namespace dnnl { + +/// @addtogroup dnnl_api_interop +/// @{ + +/// @addtogroup dnnl_api_threadpool_interop +/// @{ + +namespace threadpool_interop { + +/// Abstract threadpool interface. The users are expected to subclass this +/// interface and pass an object to the library during CPU stream creation or +/// directly in case of BLAS functions. +struct threadpool_iface { + /// Returns the number of worker threads. + virtual int get_num_threads() const = 0; + + /// Returns true if the calling thread belongs to this threadpool. + virtual bool get_in_parallel() const = 0; + + /// Submits n instances of a closure for execution in parallel: + /// + /// for (int i = 0; i < n; i++) fn(i, n); + /// + virtual void parallel_for(int n, const std::function &fn) + = 0; + + /// Returns threadpool behavior flags bit mask (see below). + virtual uint64_t get_flags() const = 0; + + /// If set, parallel_for() returns immediately and oneDNN needs implement + /// waiting for the submitted closures to finish execution on its own. + static constexpr uint64_t ASYNCHRONOUS = 1; + + virtual ~threadpool_iface() {} +}; + +} // namespace threadpool_interop + +/// @} dnnl_api_threadpool_interop + +/// @} dnnl_api_interop + +} // namespace dnnl + +/// @} dnnl_api + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_types.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_types.h new file mode 100644 index 0000000000000000000000000000000000000000..0f9507e95a9924577f767a474410edb158c3131d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_types.h @@ -0,0 +1,2936 @@ +/******************************************************************************* +* Copyright 2016-2025 Intel Corporation +* Copyright 2024 FUJITSU LIMITED +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/// @file +/// C API types definitions + +#ifndef ONEAPI_DNNL_DNNL_TYPES_H +#define ONEAPI_DNNL_DNNL_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond DO_NOT_DOCUMENT_THIS +#include +#include +/// @endcond + +#include "oneapi/dnnl/dnnl_config.h" + +#include "oneapi/dnnl/dnnl_common_types.h" + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_memory +/// @{ + +/// Memory format kind +typedef enum { + /// Undefined memory format kind, used for empty memory descriptors. + dnnl_format_kind_undef = 0, + /// A special format kind that indicates that the actual format will be + /// selected by a primitive automatically. + dnnl_format_kind_any, + /// A tensor in a generic format described by the stride and blocking + /// values in each dimension. + dnnl_blocked, + /// A special format kind that indicates that tensor format is opaque. + dnnl_format_kind_opaque, +#ifdef DNNL_EXPERIMENTAL_SPARSE + /// Format kind for sparse tensors. + dnnl_format_kind_sparse, +#endif + /// Parameter to allow internal only format kinds without undefined + /// behavior. This parameter is chosen to be valid for so long as + /// sizeof(int) >= 2. + dnnl_format_kind_max = 0x7fff, +} dnnl_format_kind_t; + +#ifdef DNNL_EXPERIMENTAL_SPARSE +/// Sparse encodings. +typedef enum { + /// Undefined sparse encoding kind, used for empty memory descriptors. + dnnl_sparse_encoding_undef = 0, + /// Compressed Sparse Row (CSR) encoding. + dnnl_csr, + /// An encoding that is used for an opaque storage schema for + /// tensors with unstructured sparsity. A memory descriptor with the + /// packed encoding cannot be used to create a memory object. It can + /// only be used to create a primitive descriptor to query the + /// actual memory descriptor (similar to the format tag `any`). + dnnl_packed, + /// Coordinate Sparse Encoding (COO). + dnnl_coo, +} dnnl_sparse_encoding_t; +#endif + +#ifdef DNNL_EXPERIMENTAL_PROFILING +/// Profiling data kind. +typedef enum { + /// Undefined profiling data kind. + dnnl_profiling_data_kind_undef = 0, + /// Data kind to query an execution time in nanoseconds. + dnnl_profiling_data_kind_time, +} dnnl_profiling_data_kind_t; + +#endif + +/// Memory format tag specification. +/// +/// oneDNN formats describe physical data layout. The physical layout +/// is described as a sequence of the dimensions as they are laid out in the +/// memory (from the outer-most to the inner-most). Note that this order +/// doesn't affect the logical order of the dimensions that is kept in the +/// `dims` field of the dnnl_memory_desc_t structure. The logical order of the +/// dimensions is specified by the primitive that uses the tensor. +/// +/// For example, CNN 5D tensor always has its logical dimensions in the order +/// `(batch, channels, depth, height, width)`, while the physical layout might be +/// `NCDHW` (corresponds to #dnnl_ncdhw format tag) or +/// `NDHWC` (corresponds to #dnnl_ndhwc format tag). +/// +/// ~~~cpp +/// int batch = 2, channels = 16, depth = 13, height = 13, width = 13; +/// +/// int ndims = 5; // 5D tensor +/// dnnl_dims_t dims = {batch, channels, depth, height, width}; +/// dnnl_memory_desc_t data_in_ncdhw; +/// dnnl_memory_desc_create_with_tag( +/// &data_in_ncdhw, 5, dims, dnnl_f32, dnnl_ncdhw); +/// +/// // note that in both cases dims passed are the same +/// dnnl_memory_desc_t data_in_ndhwc; +/// dnnl_memory_desc_create_with_tag( +/// &data_in_ndhwc, 5, dims, dnnl_f32, dnnl_ndhwc); +/// +/// dnnl_memory_desc_destroy(data_in_ncdhw); +/// dnnl_memory_desc_destroy(data_in_ndhwc); +/// ~~~ +/// +/// Memory format tags can be further divided into two categories: +/// - Domain-agnostic names, i.e. names the do not depend on the tensor usage +/// in the specific primitive. These names use letters from `a` to `l` to +/// denote logical dimension from 1 to 12, and form the order in which the +/// dimensions are laid in memory. For instance, #dnnl_ab is used to denote +/// 2D tensor where the second logical dimension (aka `b`) is the innermost, +/// i.e. has stride = 1, and the first logical dimension (`a`) laid out in +/// memory with stride equal to the size of second dimension. On the other +/// hand, #dnnl_ba is just transposed version of the same tensor: the +/// first dimension (`a`) becomes the innermost one. +/// - Domain-specific names, i.e. names that make sense only in the context of +/// a certain domain, such as CNN. This names are just aliases to the +/// corresponding domain-agnostic tags and used mostly for the convenience. +/// For example, #dnnl_nc is used to denote 2D CNN activations tensor +/// memory format, where channels are the innermost dimension and batch is an +/// outermost one. Moreover, #dnnl_nc is just an alias to #dnnl_ab, +/// since for oneDNN CNN primitives the logical dimensions of +/// activations tensors come in order: batch, channels, spatial. +/// In other words, batch corresponds to the first logical dimension (`a`), +/// channels correspond to the second one (`b`). +/// +/// The following domain-specific notation applies to memory format tags: +/// - @c 'n' denotes the mini-batch dimension +/// - @c 'c' denotes a channels dimension +/// - When there are multiple channel dimensions (for example, in convolution +/// weights tensor), @c 'i' and @c 'o' denote dimensions of input and output +/// channels +/// - @c 'd', @c 'h', and @c 'w' denote spatial depth, height, and width +/// respectively +/// +/// Upper-case letters indicate that the data is laid out in blocks for a +/// particular dimension. In such cases, the format name contains both upper- +/// and lower-case letters for that dimension with a lower-case letter preceded +/// by the block size. For example: #dnnl_nChw8c describes a format where the +/// outermost dimension is mini-batch, followed by the channel block number, +/// followed by the spatial height and width, and finally followed by 8-element +/// channel blocks. +/// +/// @sa @ref dev_guide_understanding_memory_formats +typedef enum { + /// Undefined memory format tag + dnnl_format_tag_undef = 0, + /// Undefined memory format tag. + /// The primitive selects a format automatically. + dnnl_format_tag_any, + + // Semantic agnostic section + // The physical order of dimensions is defined by the permutation of the + // characters, assuming that ab..z defines the natural order. + + // Plain formats + + dnnl_a, ///< plain 1D tensor + dnnl_ab, ///< plain 2D tensor + dnnl_abc, ///< plain 3D tensor + dnnl_abcd, ///< plain 4D tensor + dnnl_abcde, ///< plain 5D tensor + dnnl_abcdef, ///< plain 6D tensor + dnnl_abcdefg, ///< plain 7D tensor + dnnl_abcdefgh, ///< plain 8D tensor + dnnl_abcdefghi, ///< plain 9D tensor + dnnl_abcdefghij, ///< plain 10D tensor + dnnl_abcdefghijk, ///< plain 11D tensor + dnnl_abcdefghijkl, ///< plain 12D tensor + + // Permuted plain formats + + dnnl_ba, ///< permuted 2D tensor + dnnl_acb, ///< permuted 3D tensor + dnnl_bac, ///< permuted 3D tensor + dnnl_bca, ///< permuted 3D tensor + dnnl_cab, ///< permuted 3D tensor + dnnl_cba, ///< permuted 3D tensor + dnnl_abdc, ///< permuted 4D tensor + dnnl_acbd, ///< permuted 4D tensor + dnnl_acdb, ///< permuted 4D tensor + dnnl_adbc, ///< permuted 4D tensor + dnnl_adcb, ///< permuted 4D tensor + dnnl_bacd, ///< permuted 4D tensor + dnnl_bcda, ///< permuted 4D tensor + dnnl_cdab, ///< permuted 4D tensor + dnnl_cdba, ///< permuted 4D tensor + dnnl_dcab, ///< permuted 4D tensor + dnnl_abced, ///< permuted 5D tensor + dnnl_abdec, ///< permuted 5D tensor + dnnl_acbde, ///< permuted 5D tensor + dnnl_acdeb, ///< permuted 5D tensor + dnnl_adecb, ///< permuted 5D tensor + dnnl_bacde, ///< permuted 5D tensor + dnnl_bcdea, ///< permuted 5D tensor + dnnl_cdeab, ///< permuted 5D tensor + dnnl_cdeba, ///< permuted 5D tensor + dnnl_decab, ///< permuted 5D tensor + dnnl_abcdfe, ///< permuted 6D tensor + dnnl_abdefc, ///< permuted 6D tensor + dnnl_abdfce, ///< permuted 6D tensor + dnnl_acbdef, ///< permuted 6D tensor + dnnl_adefcb, ///< permuted 6D tensor + dnnl_defcab, ///< permuted 6D tensor + dnnl_abcdegf, ///< permuted 7D tensor + dnnl_abcdefhg, ///< permuted 8D tensor + dnnl_abcdefgih, ///< permuted 9D tensor + dnnl_abcdefghji, ///< permuted 10D tensor + dnnl_abcdefghikj, ///< permuted 11D tensor + dnnl_abcdefghijlk, ///< permuted 12D tensor + + // Opaque blocked formats + + dnnl_Abc16a, + dnnl_ABc16a16b, + dnnl_ABc32a32b, + dnnl_ABc4a4b, + /// 3D tensor blocked by 2nd dimension with block size 16 + dnnl_aBc16b, + dnnl_ABc16b16a, + dnnl_Abc4a, + /// 3D tensor blocked by 2nd dimension with block size 32 + dnnl_aBc32b, + /// 3D tensor blocked by 2nd dimension with block size 4 + dnnl_aBc4b, + dnnl_ABc4b16a4b, + dnnl_ABc2b8a4b, + dnnl_ABc16b16a4b, + dnnl_ABc16b16a2b, + dnnl_ABc4b4a, + dnnl_ABc8a16b2a, + dnnl_ABc8a8b, + dnnl_ABc8a4b, + /// 3D tensor blocked by 2nd dimension with block size 8 + dnnl_aBc8b, + dnnl_ABc8b16a2b, + dnnl_BAc8a16b2a, + dnnl_ABc8b8a, + dnnl_Abcd16a, + dnnl_Abcd8a, + dnnl_ABcd16a16b, + dnnl_Abcd32a, + dnnl_ABcd32a32b, + /// 4D tensor blocked by 2nd dimension with block size 16 + dnnl_aBcd16b, + dnnl_ABcd16b16a, + dnnl_aBCd16b16c, + dnnl_aBCd16c16b, + dnnl_Abcd4a, + /// 4D tensor blocked by 2nd dimension with block size 32 + dnnl_aBcd32b, + /// 4D tensor blocked by 2nd dimension with block size 4 + dnnl_aBcd4b, + dnnl_ABcd4b16a4b, + dnnl_ABcd16b16a4b, + dnnl_ABcd16b16a2b, + dnnl_ABcd4b4a, + dnnl_ABcd4a4b, + dnnl_aBCd2c4b2c, + dnnl_aBCd4b8c2b, + dnnl_aBCd4c16b4c, + dnnl_aBCd2c8b4c, + dnnl_aBCd16c16b4c, + dnnl_aBCd16c16b2c, + dnnl_aBCd4c4b, + dnnl_aBCd4b4c, + dnnl_ABcd8a16b2a, + dnnl_ABcd2b8a4b, + dnnl_ABcd8a8b, + dnnl_ABcd8a4b, + /// 4D tensor blocked by 2nd dimension with block size 8 + dnnl_aBcd8b, + dnnl_aBCd4c8b2c, + dnnl_ABcd8b16a2b, + dnnl_aBCd8b16c2b, + dnnl_BAcd8a16b2a, + /// 4D tensor blocked by 1st and 2nd dimension with block size 8 + dnnl_ABcd8b8a, + dnnl_aBCd8b8c, + dnnl_aBCd8b4c, + dnnl_aBCd8c16b2c, + dnnl_ABcde8a16b2a, + dnnl_aCBd8b16c2b, + dnnl_aBCd8c8b, + dnnl_Abcde16a, + dnnl_Abcde32a, + dnnl_ABcde16a16b, + dnnl_BAcde8a16b2a, + /// 4D tensor blocked by 3rd dimension with block size 4 + dnnl_aBCd2b4c2b, + /// 5D tensor blocked by 1st dimension with block size 16 + dnnl_ABcde4b16a4b, + /// 5D tensor blocked by 1st dimension with block size 8 + dnnl_ABcde2b8a4b, + /// 5D tensor blocked by 2nd dimension with block size 16 + dnnl_aBcde16b, + dnnl_ABcde16b16a, + dnnl_aBCde16b16c, + dnnl_aBCde16c16b, + dnnl_aBCde2c8b4c, + dnnl_Abcde4a, + /// 5D tensor blocked by 2nd dimension with block size 32 + dnnl_aBcde32b, + /// 5D tensor blocked by 2nd dimension with block size 4 + dnnl_aBcde4b, + dnnl_ABcde4b4a, + dnnl_ABcde4a4b, + dnnl_aBCde4b4c, + dnnl_aBCde2c4b2c, + dnnl_aBCde4b8c2b, + dnnl_aBCde4c16b4c, + dnnl_aBCde16c16b4c, + dnnl_aBCde16c16b2c, + dnnl_aBCde4c4b, + dnnl_Abcde8a, + dnnl_ABcde8a8b, + dnnl_ABcde8a4b, + dnnl_BAcde16b16a, + /// 5D tensor blocked by 2nd dimension with block size 8 + dnnl_aBcde8b, + dnnl_ABcde8b16a2b, + dnnl_aBCde8b16c2b, + dnnl_aBCde4c8b2c, + dnnl_aCBde8b16c2b, + dnnl_ABcde8b8a, + dnnl_ABcde32a32b, + dnnl_aBCde8b8c, + dnnl_aBCde8b4c, + dnnl_ABc4a8b8a4b, + dnnl_ABcd4a8b8a4b, + dnnl_ABcde4a8b8a4b, + dnnl_BAc4b8a8b4a, + dnnl_BAcd4b8a8b4a, + dnnl_BAcde4b8a8b4a, + dnnl_ABcd2a8b8a2b, + dnnl_aBCd4b8c8b4c, + dnnl_aBCde4b8c8b4c, + dnnl_aBCde2b8c8b2c, + dnnl_aBCde8c16b2c, + dnnl_aBCde8c8b, + /// 5D tensor blocked by 3rd dimension with block size 4 + dnnl_aBCde2b4c2b, + /// 6D tensor blocked by 2nd dimension with block size 16 + dnnl_aBcdef16b, + dnnl_aBCdef16b16c, + dnnl_aBCdef16c16b, + dnnl_aBCdef4c16b4c, + /// 6D tensor blocked by 2nd dimension with block size 8 + dnnl_aBCdef2c8b4c, + dnnl_aBCdef4c8b2c, + /// 6D tensor blocked by 3rd dimension with block size 4 + dnnl_aBCdef2b4c2b, + /// 6D tensor blocked by 2nd dimension with block size 4 + dnnl_aBcdef4b, + dnnl_aBCdef4c4b, + dnnl_aBCdef4b4c, + dnnl_aBCdef2c4b2c, + dnnl_aBCdef4b8c2b, + dnnl_aBCdef8b8c, + dnnl_aBCdef8b4c, + dnnl_aBCdef8c16b2c, + dnnl_aBCdef4b8c8b4c, + dnnl_aBCdef8b16c2b, + dnnl_aCBdef8b16c2b, + dnnl_aBCdef8c8b, + dnnl_aBdc16b, + dnnl_aBdC16b2c, + dnnl_aBdC16b4c, + dnnl_aBdc4b, + dnnl_aBdc8b, + dnnl_aBdec16b, + dnnl_aBdeC16b2c, + dnnl_aBdeC16b4c, + dnnl_aBdec32b, + dnnl_aBdec4b, + dnnl_aBdec8b, + dnnl_aBdefc16b, + dnnl_aBdefC16b2c, + dnnl_aCBdef16c16b, + dnnl_aBdefc4b, + dnnl_aBdefc8b, + dnnl_Abcdef16a, + dnnl_Abcdef32a, + dnnl_aBedc16b, + dnnl_Acb16a, + dnnl_AcB16a2b, + dnnl_AcB16a4b, + dnnl_Acb4a, + dnnl_Acb8a, + dnnl_aCBd16b16c, + dnnl_aCBd16c16b, + dnnl_aCBde16b16c, + dnnl_aCBde16c16b, + dnnl_Acdb16a, + dnnl_AcdB16a2b, + dnnl_AcdB16a4b, + dnnl_Acdb32a, + dnnl_Acdb4a, + dnnl_Acdb8a, + dnnl_Acdeb16a, + dnnl_AcdeB16a2b, + dnnl_Acdeb4a, + dnnl_Acdeb8a, + dnnl_Adcb16a, + dnnl_BAc16a16b, + dnnl_BAc16b16a, + dnnl_BAcd16a16b, + dnnl_BAcd16b16a, + dnnl_aCBd4c8b8c4b, + dnnl_aCBde4c8b8c4b, + dnnl_aCBdef4c8b8c4b, + dnnl_BAcde16a16b, + dnnl_aCBdef16b16c, + dnnl_ABc16b32a, + dnnl_ABc16b64a, + dnnl_ABc4b32a4b, + dnnl_ABc4b64a4b, + dnnl_ABc8b32a2b, + dnnl_ABc8b64a2b, + dnnl_AB16b16a, + dnnl_AB16b32a, + dnnl_AB16b64a, + dnnl_AB8b16a2b, + dnnl_AB8b32a2b, + dnnl_AB8b64a2b, + dnnl_AB4b16a4b, + dnnl_AB4b32a4b, + dnnl_AB4b64a4b, + dnnl_AB16b16a4b, + dnnl_ABcd16b32a, + dnnl_ABcd16b64a, + dnnl_ABcd4b32a4b, + dnnl_ABcd4b64a4b, + dnnl_ABcd8b32a2b, + dnnl_ABcd8b64a2b, + dnnl_ABcde4b32a4b, + dnnl_ABcde4b64a4b, + dnnl_ABcde16b16a4b, + dnnl_ABcde16b16a2b, + dnnl_ABcde16b32a, + dnnl_ABcde16b64a, + dnnl_ABcde8b32a2b, + dnnl_ABcde8b64a2b, + dnnl_aBCdef16c16b4c, + dnnl_aBCdef16c16b2c, + dnnl_AB32a32b8a4b, + dnnl_AB8a4b, + dnnl_AB32a32b8a2b, + dnnl_AB8a2b, + dnnl_abDc32d, + dnnl_abDC32d4c, + dnnl_abdEc32e, + dnnl_abdEC32e2c, + dnnl_abdEC32e4c, + dnnl_aBdefC16b4c, + dnnl_AcdeB16a4b, + dnnl_ABcd16a16b2a, + dnnl_ABc16a16b2a, + dnnl_aBCd16b16c2b, + dnnl_aBCde16b16c2b, + dnnl_Acb32a, + dnnl_AcB32a2b, + dnnl_AcB32a4b, + dnnl_Acb48a, + dnnl_AcB48a2b, + dnnl_AcB48a4b, + dnnl_Acb64a, + dnnl_AcB64a2b, + dnnl_AcB64a4b, + dnnl_cBa2b, + dnnl_cBa4b, + dnnl_aBdc32b, + dnnl_aBdC32b2c, + dnnl_aBdC32b4c, + dnnl_aBdc48b, + dnnl_aBdC48b2c, + dnnl_aBdC48b4c, + dnnl_aBdc64b, + dnnl_aBdC64b2c, + dnnl_aBdC64b4c, + dnnl_adCb2c, + dnnl_adCb4c, + dnnl_AcdB32a2b, + dnnl_AcdB32a4b, + dnnl_Acdb48a, + dnnl_AcdB48a2b, + dnnl_AcdB48a4b, + dnnl_Acdb64a, + dnnl_AcdB64a2b, + dnnl_AcdB64a4b, + dnnl_cdBa2b, + dnnl_cdBa4b, + dnnl_aBdeC32b2c, + dnnl_aBdeC32b4c, + dnnl_aBdec48b, + dnnl_aBdeC48b2c, + dnnl_aBdeC48b4c, + dnnl_aBdec64b, + dnnl_aBdeC64b2c, + dnnl_aBdeC64b4c, + dnnl_adeCb2c, + dnnl_adeCb4c, + dnnl_Acdeb32a, + dnnl_AcdeB32a2b, + dnnl_AcdeB32a4b, + dnnl_Acdeb48a, + dnnl_AcdeB48a2b, + dnnl_AcdeB48a4b, + dnnl_Acdeb64a, + dnnl_AcdeB64a2b, + dnnl_AcdeB64a4b, + dnnl_cdeBa2b, + dnnl_cdeBa4b, + dnnl_aBdefc32b, + dnnl_aBdefC32b2c, + dnnl_aBdefC32b4c, + dnnl_aBdefc48b, + dnnl_aBdefC48b2c, + dnnl_aBdefC48b4c, + dnnl_aBdefc64b, + dnnl_aBdefC64b2c, + dnnl_aBdefC64b4c, + dnnl_adefCb2c, + dnnl_adefCb4c, + dnnl_AB16b32a4b, + dnnl_AB16b48a4b, + dnnl_AB16b64a4b, + dnnl_AB16b16a2b, + dnnl_AB16b32a2b, + dnnl_AB16b48a2b, + dnnl_AB16b64a2b, + dnnl_ABc16b32a4b, + dnnl_ABc16b48a4b, + dnnl_ABc16b64a4b, + dnnl_ABc16b32a2b, + dnnl_ABc16b48a2b, + dnnl_ABc16b64a2b, + dnnl_ABcd16b32a4b, + dnnl_ABcd16b48a4b, + dnnl_ABcd16b64a4b, + dnnl_ABcd16b32a2b, + dnnl_ABcd16b48a2b, + dnnl_ABcd16b64a2b, + dnnl_ABcde16b32a4b, + dnnl_ABcde16b48a4b, + dnnl_ABcde16b64a4b, + dnnl_ABcde16b32a2b, + dnnl_ABcde16b48a2b, + dnnl_ABcde16b64a2b, + dnnl_ABc32a16b, + dnnl_ABcd32a16b, + dnnl_ABcde32a16b, + dnnl_AB48a16b, + dnnl_AB48a32b, + dnnl_ABc40a16b, + dnnl_ABc40a32b, + dnnl_aBC48b16c, + dnnl_aBC48b32c, + dnnl_ABcd40a16b, + dnnl_ABcd40a32b, + dnnl_abCd32c, + dnnl_abdCe32c, + dnnl_abdCE32c2e, + dnnl_BA16a16b2a, + dnnl_BA16a32b2a, + dnnl_BA16a48b2a, + dnnl_BA16a64b2a, + dnnl_BA16a16b4a, + dnnl_BA16a32b4a, + dnnl_BA16a48b4a, + dnnl_BA16a64b4a, + dnnl_ABcd8a2b, + dnnl_aBdeC16c16b2c, + dnnl_aBdeC16c16b4c, + dnnl_aBdefC16c16b2c, + dnnl_AcB16b16a2b, + dnnl_AcB16b16a4b, + dnnl_AcdB16b16a2b, + dnnl_AcdB16b16a4b, + dnnl_AcdeB16b16a2b, + dnnl_aBdefC16c16b4c, + dnnl_AcdeB16b16a4b, + dnnl_AcB16b32a2b, + dnnl_AcB16b32a4b, + dnnl_AcB16b48a2b, + dnnl_AcB16b48a4b, + dnnl_AcB16b64a2b, + dnnl_AcB16b64a4b, + dnnl_aBdC16c16b2c, + dnnl_aBdC16c16b4c, + dnnl_aBdC16c32b2c, + dnnl_aBdC16c32b4c, + dnnl_aBdC16c48b2c, + dnnl_aBdC16c48b4c, + dnnl_aBdC16c64b2c, + dnnl_aBdC16c64b4c, + dnnl_AcdB16b32a2b, + dnnl_AcdB16b32a4b, + dnnl_AcdB16b48a2b, + dnnl_AcdB16b48a4b, + dnnl_AcdB16b64a2b, + dnnl_AcdB16b64a4b, + dnnl_aBdeC16c32b2c, + dnnl_aBdeC16c32b4c, + dnnl_aBdeC16c48b2c, + dnnl_aBdeC16c48b4c, + dnnl_aBdeC16c64b2c, + dnnl_aBdeC16c64b4c, + dnnl_AcdeB16b32a2b, + dnnl_AcdeB16b32a4b, + dnnl_AcdeB16b48a2b, + dnnl_AcdeB16b48a4b, + dnnl_AcdeB16b64a2b, + dnnl_AcdeB16b64a4b, + dnnl_aBdefC16c32b2c, + dnnl_aBdefC16c32b4c, + dnnl_aBdefC16c48b2c, + dnnl_aBdefC16c48b4c, + dnnl_aBdefC16c64b2c, + dnnl_aBdefC16c64b4c, + dnnl_decbA16a, + dnnl_ABc4a2b, + dnnl_ABc8a2b, + dnnl_aBCd8b2c, + dnnl_ABcde4a2b, + dnnl_ABcde8a2b, + dnnl_ABcde40a16b, + dnnl_ABcde40a32b, + dnnl_aBCde8b2c, + dnnl_ABcde4a8b8a2b, + dnnl_ABcd4a8b8a2b, + dnnl_ABc4a8b8a2b, + dnnl_aBCdef4b8c8b2c, + dnnl_aBCde4b8c8b2c, + dnnl_aBCd4b8c8b2c, + dnnl_BAcde4b8a8b2a, + dnnl_BAcd4b8a8b2a, + dnnl_BAc4b8a8b2a, + dnnl_aCBdef4c8b8c2b, + dnnl_aCBde4c8b8c2b, + dnnl_aCBd4c8b8c2b, + dnnl_aBCdef8b2c, + dnnl_AB32a16b, + dnnl_AB32a32b, + dnnl_BA4b8a8b2a, + dnnl_BA4b8a8b4a, + dnnl_aBC32b16c, + dnnl_aBC32b32c, + dnnl_aCB4c8b8c2b, + dnnl_aCB4c8b8c4b, + dnnl_ABcd4a2b, + dnnl_ABc2b8a16b4a, + dnnl_ABcd2b8a16b4a, + dnnl_ABcde2b8a16b4a, + dnnl_ABc2a8b16a4b, + dnnl_ABc2a8b16a2b, + dnnl_ABc2b32a8b, + dnnl_ABcd2a8b16a4b, + dnnl_ABcd2a8b16a2b, + dnnl_aCBd2c8b16c2b, + dnnl_ABcd2b32a8b, + dnnl_aBCd2c8b16c2b, + dnnl_ABcde2a8b16a4b, + dnnl_ABcde2a8b16a2b, + dnnl_aCBde2c8b16c2b, + dnnl_ABcde2b32a8b, + dnnl_aBC2b8c16b2c, + dnnl_aBCd2b8c16b2c, + dnnl_aBCde2b8c16b2c, + dnnl_aBCdef2b8c16b2c, + dnnl_BAcde2b8a16b4a, + dnnl_BAcd2b8a16b4a, + dnnl_BAc2b8a16b4a, + dnnl_BAcde2b8a16b2a, + dnnl_BAcd2b8a16b2a, + dnnl_BAc2b8a16b2a, + dnnl_aBCde2c8b16c2b, + dnnl_aBCdef2c8b16c2b, + dnnl_aCBdef2c8b16c2b, + dnnl_aBCd2b8c16b4c, + dnnl_aBCde2b8c16b4c, + dnnl_BA4b8a16b2a, + dnnl_BA4b8a16b4a, + dnnl_aCB4c8b16c2b, + dnnl_aCB4c8b16c4b, + dnnl_BA16a16b, + dnnl_BA16a32b, + dnnl_BA16a48b, + dnnl_BA16a64b, + dnnl_aCB16c2b, + dnnl_aCB16c4b, + dnnl_BA16b2a, + dnnl_BA16b4a, + dnnl_aBC16b16c, + dnnl_aBC16b32c, + dnnl_AB16a16b, + dnnl_AB16a32b, + dnnl_ABcde16a16b2a, + dnnl_aBCdef16b16c2b, + dnnl_Acedb16a, + dnnl_aBdfec16b, + dnnl_abdEC64e2c, + dnnl_abdEC64e4c, + dnnl_aCB16b16c, + dnnl_aCB16b32c, + dnnl_aCB16b48c, + dnnl_aCB16b64c, + dnnl_aCB16b16c2b, + dnnl_aCB16b32c2b, + dnnl_aCB16b48c2b, + dnnl_aCB16b64c2b, + dnnl_aCB16b16c4b, + dnnl_aCB16b32c4b, + dnnl_aCB16b48c4b, + dnnl_aCB16b64c4b, + dnnl_abCd4c, + dnnl_abCde4c, + dnnl_abCdef4c, + dnnl_abCde32c, + dnnl_abCdef32c, + dnnl_ABcd16a32b, + dnnl_decbA8a, + dnnl_aCdefB16b32c2b, + dnnl_aCdefB16b32c4b, + dnnl_aCdefB16b48c2b, + dnnl_aCdefB16b48c4b, + dnnl_aCdefB16b64c2b, + dnnl_aCdefB16b64c4b, + dnnl_BcdeA16a32b2a, + dnnl_BcdeA16a32b4a, + dnnl_BcdeA16a48b2a, + dnnl_BcdeA16a48b4a, + dnnl_BcdeA16a64b2a, + dnnl_BcdeA16a64b4a, + dnnl_aCdefb32c, + dnnl_aCdefB32c2b, + dnnl_aCdefB32c4b, + dnnl_aCdefb48c, + dnnl_aCdefB48c2b, + dnnl_aCdefB48c4b, + dnnl_aCdefb64c, + dnnl_aCdefB64c2b, + dnnl_aCdefB64c4b, + dnnl_Bcdea32b, + dnnl_BcdeA32b2a, + dnnl_BcdeA32b4a, + dnnl_Bcdea48b, + dnnl_BcdeA48b2a, + dnnl_BcdeA48b4a, + dnnl_Bcdea64b, + dnnl_BcdeA64b2a, + dnnl_BcdeA64b4a, + dnnl_Bca32b, + dnnl_BcA32b2a, + dnnl_BcA32b4a, + dnnl_Bca48b, + dnnl_BcA48b2a, + dnnl_BcA48b4a, + dnnl_Bca64b, + dnnl_BcA64b2a, + dnnl_BcA64b4a, + dnnl_aCdb32c, + dnnl_aCdB32c2b, + dnnl_aCdB32c4b, + dnnl_aCdb48c, + dnnl_aCdB48c2b, + dnnl_aCdB48c4b, + dnnl_aCdb64c, + dnnl_aCdB64c2b, + dnnl_aCdB64c4b, + dnnl_BcA16a16b2a, + dnnl_BcA16a16b4a, + dnnl_BcdA16a16b2a, + dnnl_BcdA16a16b4a, + dnnl_BcdeA16a16b2a, + dnnl_BcdeA16a16b4a, + dnnl_aCdB16b16c2b, + dnnl_aCdB16b16c4b, + dnnl_aCdeB16b16c2b, + dnnl_aCdeB16b16c4b, + dnnl_aCdefB16b16c2b, + dnnl_aCdefB16b16c4b, + dnnl_BcA16a32b2a, + dnnl_BcA16a32b4a, + dnnl_BcA16a48b2a, + dnnl_BcA16a48b4a, + dnnl_BcA16a64b2a, + dnnl_BcA16a64b4a, + dnnl_aCdB16b32c2b, + dnnl_aCdB16b32c4b, + dnnl_aCdB16b48c2b, + dnnl_aCdB16b48c4b, + dnnl_aCdB16b64c2b, + dnnl_aCdB16b64c4b, + dnnl_BcdA16a32b2a, + dnnl_BcdA16a32b4a, + dnnl_BcdA16a48b2a, + dnnl_BcdA16a48b4a, + dnnl_BcdA16a64b2a, + dnnl_BcdA16a64b4a, + dnnl_aCdeB16b32c2b, + dnnl_aCdeB16b32c4b, + dnnl_aCdeB16b48c2b, + dnnl_aCdeB16b48c4b, + dnnl_aCdeB16b64c2b, + dnnl_aCdeB16b64c4b, + dnnl_Bca16b, + dnnl_BcA16b2a, + dnnl_BcA16b4a, + dnnl_Bcda16b, + dnnl_BcdA16b2a, + dnnl_BcdA16b4a, + dnnl_Bcdea16b, + dnnl_BcdeA16b2a, + dnnl_BcdeA16b4a, + dnnl_aCdb16c, + dnnl_aCdB16c2b, + dnnl_aCdB16c4b, + dnnl_aCdeb16c, + dnnl_aCdeB16c2b, + dnnl_aCdeB16c4b, + dnnl_aCdefb16c, + dnnl_aCdefB16c2b, + dnnl_aCdefB16c4b, + dnnl_Bcda32b, + dnnl_BcdA32b2a, + dnnl_BcdA32b4a, + dnnl_Bcda48b, + dnnl_BcdA48b2a, + dnnl_BcdA48b4a, + dnnl_Bcda64b, + dnnl_BcdA64b2a, + dnnl_BcdA64b4a, + dnnl_aCdeb32c, + dnnl_aCdeB32c2b, + dnnl_aCdeB32c4b, + dnnl_aCdeb48c, + dnnl_aCdeB48c2b, + dnnl_aCdeB48c4b, + dnnl_aCdeb64c, + dnnl_aCdeB64c2b, + dnnl_aCdeB64c4b, + dnnl_Acb24a, + dnnl_Acdb24a, + dnnl_Acdeb24a, + dnnl_aBdc24b, + dnnl_aBdec24b, + dnnl_aBdefc24b, + dnnl_abDc16d, + dnnl_abdEc16e, + dnnl_abdCe16c, + dnnl_AcB24a2b, + dnnl_AcdB24a2b, + dnnl_AcdeB24a2b, + dnnl_aBdC24b2c, + dnnl_aBdeC24b2c, + dnnl_aBdefC24b2c, + dnnl_AcB8a2b, + dnnl_AcdB8a2b, + dnnl_AcdeB8a2b, + dnnl_aBdC8b2c, + dnnl_aBdeC8b2c, + dnnl_aBdefC8b2c, + dnnl_AB8b32a, + dnnl_ABc8b32a, + dnnl_ABcd8b32a, + dnnl_ABcde8b32a, + dnnl_AB8b24a, + dnnl_ABc8b24a, + dnnl_ABcd8b24a, + dnnl_ABcde8b24a, + dnnl_AB8b16a, + dnnl_ABc8b16a, + dnnl_ABcd8b16a, + dnnl_ABcde8b16a, + dnnl_AB8b8a, + dnnl_AB4b8a4b, + dnnl_AB4b24a4b, + dnnl_ABc4b8a4b, + dnnl_ABc4b24a4b, + dnnl_ABcd4b8a4b, + dnnl_ABcd4b24a4b, + dnnl_ABcde4b8a4b, + dnnl_ABcde4b24a4b, + dnnl_AB8b24a2b, + dnnl_ABc8b24a2b, + dnnl_ABcd8b24a2b, + dnnl_ABcde8b24a2b, + dnnl_AB8b8a2b, + dnnl_ABc8b8a2b, + dnnl_ABcd8b8a2b, + dnnl_ABcde8b8a2b, + dnnl_AcB24a4b, + dnnl_AcdB24a4b, + dnnl_AcdeB24a4b, + dnnl_aBdC24b4c, + dnnl_aBdeC24b4c, + dnnl_aBdefC24b4c, + dnnl_AcB8a4b, + dnnl_AcdB8a4b, + dnnl_AcdeB8a4b, + dnnl_aBdC8b4c, + dnnl_aBdeC8b4c, + dnnl_aBdefC8b4c, + dnnl_Bca8b, + dnnl_BcA8b2a, + dnnl_Bcda8b, + dnnl_BcdA8b2a, + dnnl_Bcdea8b, + dnnl_BcdeA8b2a, + dnnl_aCdb8c, + dnnl_aCdB8c2b, + dnnl_aCdeb8c, + dnnl_aCdeB8c2b, + dnnl_aCdefb8c, + dnnl_aCdefB8c2b, + dnnl_Bca24b, + dnnl_BcA24b2a, + dnnl_Bcda24b, + dnnl_BcdA24b2a, + dnnl_Bcdea24b, + dnnl_BcdeA24b2a, + dnnl_aCdb24c, + dnnl_aCdB24c2b, + dnnl_aCdeb24c, + dnnl_aCdeB24c2b, + dnnl_aCdefb24c, + dnnl_aCdefB24c2b, + dnnl_BcA8b4a, + dnnl_BcdA8b4a, + dnnl_BcdeA8b4a, + dnnl_aCdB8c4b, + dnnl_aCdeB8c4b, + dnnl_aCdefB8c4b, + dnnl_BcA24b4a, + dnnl_BcdA24b4a, + dnnl_BcdeA24b4a, + dnnl_aCdB24c4b, + dnnl_aCdeB24c4b, + dnnl_aCdefB24c4b, + dnnl_AB16b48a, + dnnl_ABc16b48a, + dnnl_ABcd16b48a, + dnnl_ABcde16b48a, + dnnl_ABc16a4b, + dnnl_ABcd16a4b, + dnnl_ABcde16a4b, + dnnl_defcbA16a, + dnnl_defcbA8a, + dnnl_AcB16b64a, + dnnl_AcdB16b64a, + dnnl_AcdeB16b64a, + dnnl_AcB16b48a, + dnnl_AcdB16b48a, + dnnl_AcdeB16b48a, + dnnl_AcB16b32a, + dnnl_AcdB16b32a, + dnnl_AcdeB16b32a, + dnnl_AcB16b16a, + dnnl_AcdB16b16a, + dnnl_AcdeB16b16a, + dnnl_AcB8b32a, + dnnl_AcdB8b32a, + dnnl_AcdeB8b32a, + dnnl_AcB8b24a, + dnnl_AcdB8b24a, + dnnl_AcdeB8b24a, + dnnl_AcB8b16a, + dnnl_AcdB8b16a, + dnnl_AcdeB8b16a, + dnnl_AcB8b8a, + dnnl_AcdB8b8a, + dnnl_AcdeB8b8a, + dnnl_AcB8b64a2b, + dnnl_AcdB8b64a2b, + dnnl_AcdeB8b64a2b, + dnnl_AcB8b32a2b, + dnnl_AcdB8b32a2b, + dnnl_AcdeB8b32a2b, + dnnl_AcB8b24a2b, + dnnl_AcdB8b24a2b, + dnnl_AcdeB8b24a2b, + dnnl_AcB8b16a2b, + dnnl_AcdB8b16a2b, + dnnl_AcdeB8b16a2b, + dnnl_AcB8b8a2b, + dnnl_AcdB8b8a2b, + dnnl_AcdeB8b8a2b, + dnnl_AcB4b64a4b, + dnnl_AcdB4b64a4b, + dnnl_AcdeB4b64a4b, + dnnl_AcB4b32a4b, + dnnl_AcdB4b32a4b, + dnnl_AcdeB4b32a4b, + dnnl_AcB4b24a4b, + dnnl_AcdB4b24a4b, + dnnl_AcdeB4b24a4b, + dnnl_AcB4b16a4b, + dnnl_AcdB4b16a4b, + dnnl_AcdeB4b16a4b, + dnnl_AcB4b8a4b, + dnnl_AcdB4b8a4b, + dnnl_AcdeB4b8a4b, + dnnl_Ab4a, + dnnl_Ab8a, + dnnl_BA4b4a, + dnnl_BA8b4a, + dnnl_BA2a24b, + dnnl_aCB2b24c, + dnnl_BA2a8b, + dnnl_aCB2b8c, + dnnl_BA8a24b, + dnnl_aCB8b24c, + dnnl_BA8a16b, + dnnl_aCB8b16c, + dnnl_BA8a8b, + dnnl_aCB8b8c, + dnnl_bcad, + dnnl_cabd, + dnnl_dabc, + dnnl_Ab32a, + dnnl_aCBd8b8c, + dnnl_aCBde8b8c, + dnnl_BAc8a8b, + dnnl_BAcd8a8b, + dnnl_BAcde8a8b, + dnnl_aCBdef8b8c, + dnnl_abdEC16e4c, + dnnl_abDC16d4c, + + /// Just a sentinel, not real memory format tag. Must be changed after new + /// format tag is added. + dnnl_format_tag_last, + + // Aliases + + /// 1D tensor, an alias to #dnnl_a + dnnl_x = dnnl_a, + /// 2D CNN activations tensor, an alias to #dnnl_ab + dnnl_nc = dnnl_ab, + /// 2D CNN activations tensor, an alias to #dnnl_ba + dnnl_cn = dnnl_ba, + /// 2D RNN statistics tensor, an alias to #dnnl_ab + dnnl_tn = dnnl_ab, + /// 2D RNN statistics tensor, an alias to #dnnl_ba + dnnl_nt = dnnl_ba, + /// 3D CNN activations tensor, an alias to #dnnl_abc + dnnl_ncw = dnnl_abc, + /// 3D CNN activations tensor, an alias to #dnnl_acb + dnnl_nwc = dnnl_acb, + /// 4D CNN activations tensor, an alias to #dnnl_abcd + dnnl_nchw = dnnl_abcd, + /// 4D CNN activations tensor, an alias to #dnnl_acdb + dnnl_nhwc = dnnl_acdb, + /// 4D CNN activations tensor, an alias to #dnnl_bcda + dnnl_chwn = dnnl_bcda, + /// 5D CNN activations tensor, an alias to #dnnl_abcde + dnnl_ncdhw = dnnl_abcde, + /// 5D CNN activations tensor, an alias to #dnnl_acdeb + dnnl_ndhwc = dnnl_acdeb, + + /// 2D CNN weights tensor, an alias to #dnnl_ab + dnnl_oi = dnnl_ab, + /// 2D CNN weights tensor, an alias to #dnnl_ba + dnnl_io = dnnl_ba, + /// 3D CNN weights tensor, an alias to #dnnl_abc + dnnl_oiw = dnnl_abc, + /// 3D CNN weights tensor, an alias to #dnnl_acb + dnnl_owi = dnnl_acb, + /// 3D CNN weights tensor, an alias to #dnnl_cba + dnnl_wio = dnnl_cba, + /// 3D CNN weights tensor, an alias to #dnnl_cab + dnnl_woi = dnnl_cab, + /// 3D CNN weights tensor, an alias to #dnnl_bca + dnnl_iwo = dnnl_bca, + /// 4D CNN weights tensor, an alias to #dnnl_abcd + dnnl_oihw = dnnl_abcd, + /// 4D CNN weights tensor, an alias to #dnnl_cdba + dnnl_hwio = dnnl_cdba, + /// 4D CNN weights tensor, an alias to #dnnl_cdab + dnnl_hwoi = dnnl_cdab, + /// 4D CNN weights tensor, an alias to #dnnl_acdb + dnnl_ohwi = dnnl_acdb, + /// 4D CNN weights tensor, an alias to #dnnl_bcda + dnnl_ihwo = dnnl_bcda, + /// 4D CNN weights tensor, an alias to #dnnl_bacd + dnnl_iohw = dnnl_bacd, + /// 5D CNN weights tensor, an alias to #dnnl_abcde + dnnl_oidhw = dnnl_abcde, + /// 5D CNN weights tensor, an alias to #dnnl_bacde + dnnl_iodhw = dnnl_bacde, + /// 5D CNN weights tensor, an alias to #dnnl_cdeba + dnnl_dhwio = dnnl_cdeba, + /// 5D CNN weights tensor, an alias to #dnnl_cdeab + dnnl_dhwoi = dnnl_cdeab, + /// 5D CNN weights tensor, an alias to #dnnl_acdeb + dnnl_odhwi = dnnl_acdeb, + /// 5D CNN weights tensor, an alias to #dnnl_bcdea + dnnl_idhwo = dnnl_bcdea, + + /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abcd + dnnl_goiw = dnnl_abcd, + /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abdc + dnnl_gowi = dnnl_abdc, + /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_dcab + dnnl_wigo = dnnl_dcab, + /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abcde + dnnl_goihw = dnnl_abcde, + /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abdec + dnnl_gohwi = dnnl_abdec, + /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_decab + dnnl_hwigo = dnnl_decab, + /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_acbde + dnnl_giohw = dnnl_acbde, + /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abcdef + dnnl_goidhw = dnnl_abcdef, + /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abdefc + dnnl_godhwi = dnnl_abdefc, + /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_acbdef + dnnl_giodhw = dnnl_acbdef, + /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_defcab + dnnl_dhwigo = dnnl_defcab, + + /// 3D RNN data tensor in the format (seq_length, batch, input channels), + /// an alias to #dnnl_abc. + dnnl_tnc = dnnl_abc, + /// 3D RNN data tensor in the format (batch, seq_length, input channels), + /// an alias to #dnnl_bac. + dnnl_ntc = dnnl_bac, + /// 4D RNN states tensor in the format (num_layers, num_directions, + /// batch, state channels), an alias to #dnnl_abcd. + dnnl_ldnc = dnnl_abcd, + /// 5D RNN weights tensor in the format (num_layers, num_directions, + /// input_channels, num_gates, output_channels), an alias to #dnnl_abcde. + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + dnnl_ldigo = dnnl_abcde, + /// 5D RNN weights tensor in the format (num_layers, num_directions, + /// num_gates, output_channels, input_channels), an alias to #dnnl_abdec. + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + dnnl_ldgoi = dnnl_abdec, + /// 4D LSTM projection tensor in the format (num_layers, num_directions, + /// num_channels_in_hidden_state, num_channels_in_recurrent_projection), + /// an alias to #dnnl_abcd. + dnnl_ldio = dnnl_abcd, + /// 4D LSTM projection tensor in the format (num_layers, num_directions, + /// num_channels_in_recurrent_projection, num_channels_in_hidden_state), + /// an alias to #dnnl_abdc. + dnnl_ldoi = dnnl_abdc, + /// 4D RNN bias tensor in the format (num_layers, num_directions, + /// num_gates, output_channels), an alias to #dnnl_abcd. + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + dnnl_ldgo = dnnl_abcd, + /// 5D LSTM projection tensor + dnnl_ldOi16o = dnnl_abDc16d, + dnnl_ldOi32o = dnnl_abDc32d, + dnnl_ldOI16o4i = dnnl_abDC16d4c, + dnnl_ldOI32o4i = dnnl_abDC32d4c, + dnnl_ldIo32i = dnnl_abCd32c, + /// 6D RNN weights tensor + dnnl_ldgOi16o = dnnl_abdEc16e, + dnnl_ldgOI16o4i = dnnl_abdEC16e4c, + dnnl_ldgOi32o = dnnl_abdEc32e, + dnnl_ldgOI32o2i = dnnl_abdEC32e2c, + dnnl_ldgOI32o4i = dnnl_abdEC32e4c, + dnnl_ldgOI64o2i = dnnl_abdEC64e2c, + dnnl_ldgOI64o4i = dnnl_abdEC64e4c, + dnnl_ldgIo16i = dnnl_abdCe16c, + dnnl_ldgIo32i = dnnl_abdCe32c, + dnnl_ldgIO32i2o = dnnl_abdCE32c2e, + + // Opaque data types, are not to be used explicitly + + // data + /// 5D CNN activations tensor blocked by channels with block size 32, + /// an alias to #dnnl_aBcde32b + dnnl_nCdhw32c = dnnl_aBcde32b, + /// 5D CNN activations tensor blocked by channels with block size 16, + /// an alias to #dnnl_aBcde16b + dnnl_nCdhw16c = dnnl_aBcde16b, + /// 5D CNN activations tensor blocked by channels with block size 4, + /// an alias to #dnnl_aBcde4b + dnnl_nCdhw4c = dnnl_aBcde4b, + /// 5D CNN activations tensor blocked by channels with block size 8, + /// an alias to #dnnl_aBcde8b + dnnl_nCdhw8c = dnnl_aBcde8b, + /// 4D CNN activations tensor blocked by channels with block size 32, + /// an alias to #dnnl_aBcd32b + dnnl_nChw32c = dnnl_aBcd32b, + /// 4D CNN activations tensor blocked by channels with block size 16, + /// an alias to #dnnl_aBcd16b + dnnl_nChw16c = dnnl_aBcd16b, + /// 4D CNN activations tensor blocked by channels with block size 4, + /// an alias to #dnnl_aBcd4b + dnnl_nChw4c = dnnl_aBcd4b, + /// 4D CNN activations tensor blocked by channels with block size 8, + /// an alias to #dnnl_aBcd8b + dnnl_nChw8c = dnnl_aBcd8b, + /// 3D CNN activations tensor blocked by channels with block size 32, + /// an alias to #dnnl_aBc32b + dnnl_nCw32c = dnnl_aBc32b, + /// 3D CNN activations tensor blocked by channels with block size 16, + /// an alias to #dnnl_aBc16b + dnnl_nCw16c = dnnl_aBc16b, + /// 3D CNN activations tensor blocked by channels with block size 4, + /// an alias to #dnnl_aBc4b + dnnl_nCw4c = dnnl_aBc4b, + /// 3D CNN activations tensor blocked by channels with block size 8, + /// an alias to #dnnl_aBc8b + dnnl_nCw8c = dnnl_aBc8b, + dnnl_NCw16n16c = dnnl_ABc16a16b, + dnnl_NCdhw16n16c = dnnl_ABcde16a16b, + dnnl_NChw16n16c = dnnl_ABcd16a16b, + dnnl_NCw32n16c = dnnl_ABc32a16b, + dnnl_NChw32n16c = dnnl_ABcd32a16b, + dnnl_NChw16n32c = dnnl_ABcd16a32b, + dnnl_NCdhw32n16c = dnnl_ABcde32a16b, + dnnl_NCw32n32c = dnnl_ABc32a32b, + dnnl_NChw32n32c = dnnl_ABcd32a32b, + dnnl_NCdhw32n32c = dnnl_ABcde32a32b, + + // weights, 2D + dnnl_OI16i16o = dnnl_AB16b16a, + dnnl_OI16i32o = dnnl_AB16b32a, + dnnl_OI16i48o = dnnl_AB16b48a, + dnnl_OI16i64o = dnnl_AB16b64a, + dnnl_OI8i8o2i = dnnl_AB8b8a2b, + dnnl_OI8i16o2i = dnnl_AB8b16a2b, + dnnl_OI8i24o2i = dnnl_AB8b24a2b, + dnnl_OI8i32o2i = dnnl_AB8b32a2b, + dnnl_OI8i64o2i = dnnl_AB8b64a2b, + dnnl_OI4i8o4i = dnnl_AB4b8a4b, + dnnl_OI4i16o4i = dnnl_AB4b16a4b, + dnnl_OI4i24o4i = dnnl_AB4b24a4b, + dnnl_OI4i32o4i = dnnl_AB4b32a4b, + dnnl_OI4i64o4i = dnnl_AB4b64a4b, + dnnl_OI16i16o4i = dnnl_AB16b16a4b, + dnnl_OI8i32o = dnnl_AB8b32a, + dnnl_OI8i24o = dnnl_AB8b24a, + dnnl_OI8i16o = dnnl_AB8b16a, + dnnl_OI8i8o = dnnl_AB8b8a, + + // weights, 3D + dnnl_IOw8o8i = dnnl_BAc8a8b, + dnnl_IOw16o16i = dnnl_BAc16a16b, + dnnl_IOw16i16o = dnnl_BAc16b16a, + dnnl_OIw16i16o = dnnl_ABc16b16a, + dnnl_OwI16i16o = dnnl_AcB16b16a, + dnnl_OIw16i32o = dnnl_ABc16b32a, + dnnl_OwI16i32o = dnnl_AcB16b32a, + dnnl_OIw16i48o = dnnl_ABc16b48a, + dnnl_OwI16i48o = dnnl_AcB16b48a, + dnnl_OIw16i64o = dnnl_ABc16b64a, + dnnl_OwI16i64o = dnnl_AcB16b64a, + dnnl_OIw16o16i = dnnl_ABc16a16b, + dnnl_Oiw16o = dnnl_Abc16a, + dnnl_OIw4i8o4i = dnnl_ABc4b8a4b, + dnnl_OwI4i8o4i = dnnl_AcB4b8a4b, + dnnl_OIw4i16o4i = dnnl_ABc4b16a4b, + dnnl_OwI4i16o4i = dnnl_AcB4b16a4b, + dnnl_OIw4i24o4i = dnnl_ABc4b24a4b, + dnnl_OwI4i24o4i = dnnl_AcB4b24a4b, + dnnl_OIw4i32o4i = dnnl_ABc4b32a4b, + dnnl_OwI4i32o4i = dnnl_AcB4b32a4b, + dnnl_OIw4i64o4i = dnnl_ABc4b64a4b, + dnnl_OwI4i64o4i = dnnl_AcB4b64a4b, + dnnl_OIw2i8o4i = dnnl_ABc2b8a4b, + dnnl_OIw16i16o4i = dnnl_ABc16b16a4b, + dnnl_OIw16i16o2i = dnnl_ABc16b16a2b, + dnnl_OIw16o16i2o = dnnl_ABc16a16b2a, + dnnl_OIw4i4o = dnnl_ABc4b4a, + dnnl_OIw4o4i = dnnl_ABc4a4b, + dnnl_Oiw4o = dnnl_Abc4a, + dnnl_OIw8i8o2i = dnnl_ABc8b8a2b, + dnnl_OwI8i8o2i = dnnl_AcB8b8a2b, + dnnl_OIw8i16o2i = dnnl_ABc8b16a2b, + dnnl_OwI8i16o2i = dnnl_AcB8b16a2b, + dnnl_OIw8i24o2i = dnnl_ABc8b24a2b, + dnnl_OwI8i24o2i = dnnl_AcB8b24a2b, + dnnl_OIw8i32o2i = dnnl_ABc8b32a2b, + dnnl_OwI8i32o2i = dnnl_AcB8b32a2b, + dnnl_OIw8i64o2i = dnnl_ABc8b64a2b, + dnnl_OwI8i64o2i = dnnl_AcB8b64a2b, + dnnl_OIw8i8o = dnnl_ABc8b8a, + dnnl_OwI8i8o = dnnl_AcB8b8a, + dnnl_OIw8o16i2o = dnnl_ABc8a16b2a, + dnnl_IOw8o16i2o = dnnl_BAc8a16b2a, + dnnl_OIw8o8i = dnnl_ABc8a8b, + dnnl_OIw8o4i = dnnl_ABc8a4b, + dnnl_Owi16o = dnnl_Acb16a, + dnnl_OwI16o2i = dnnl_AcB16a2b, + dnnl_OwI16o4i = dnnl_AcB16a4b, + dnnl_Iwo8i = dnnl_Bca8b, + dnnl_IwO8i2o = dnnl_BcA8b2a, + dnnl_IwO8i4o = dnnl_BcA8b4a, + dnnl_Iwo16i = dnnl_Bca16b, + dnnl_IwO16i2o = dnnl_BcA16b2a, + dnnl_IwO16i4o = dnnl_BcA16b4a, + dnnl_Iwo24i = dnnl_Bca24b, + dnnl_IwO24i2o = dnnl_BcA24b2a, + dnnl_IwO24i4o = dnnl_BcA24b4a, + dnnl_Owi4o = dnnl_Acb4a, + dnnl_Owi8o = dnnl_Acb8a, + dnnl_OwI8o2i = dnnl_AcB8a2b, + dnnl_OIw8i32o = dnnl_ABc8b32a, + dnnl_OwI8i32o = dnnl_AcB8b32a, + dnnl_OIw8i24o = dnnl_ABc8b24a, + dnnl_OwI8i24o = dnnl_AcB8b24a, + dnnl_OIw8i16o = dnnl_ABc8b16a, + dnnl_OwI8i16o = dnnl_AcB8b16a, + dnnl_OwI8o4i = dnnl_AcB8a4b, + + // weights, 4D + dnnl_IOhw16i16o = dnnl_BAcd16b16a, + dnnl_IOhw8o8i = dnnl_BAcd8a8b, + dnnl_IOhw16o16i = dnnl_BAcd16a16b, + dnnl_Ohwi16o = dnnl_Acdb16a, + dnnl_OhwI16o2i = dnnl_AcdB16a2b, + dnnl_OhwI16o4i = dnnl_AcdB16a4b, + dnnl_Ihwo8i = dnnl_Bcda8b, + dnnl_IhwO8i2o = dnnl_BcdA8b2a, + dnnl_IhwO8i4o = dnnl_BcdA8b4a, + dnnl_Ihwo16i = dnnl_Bcda16b, + dnnl_IhwO16i2o = dnnl_BcdA16b2a, + dnnl_IhwO16i4o = dnnl_BcdA16b4a, + dnnl_Ihwo24i = dnnl_Bcda24b, + dnnl_IhwO24i2o = dnnl_BcdA24b2a, + dnnl_IhwO24i4o = dnnl_BcdA24b4a, + dnnl_Ohwi24o = dnnl_Acdb24a, + dnnl_Ohwi32o = dnnl_Acdb32a, + dnnl_Ohwi4o = dnnl_Acdb4a, + dnnl_Ohwi8o = dnnl_Acdb8a, + dnnl_OhwI8o2i = dnnl_AcdB8a2b, + dnnl_OhwI8o4i = dnnl_AcdB8a4b, + dnnl_OIhw16i16o = dnnl_ABcd16b16a, + dnnl_OhwI16i16o = dnnl_AcdB16b16a, + dnnl_OIhw16i32o = dnnl_ABcd16b32a, + dnnl_OhwI16i32o = dnnl_AcdB16b32a, + dnnl_OIhw16i48o = dnnl_ABcd16b48a, + dnnl_OhwI16i48o = dnnl_AcdB16b48a, + dnnl_OIhw16i64o = dnnl_ABcd16b64a, + dnnl_OhwI16i64o = dnnl_AcdB16b64a, + dnnl_OIhw16o16i = dnnl_ABcd16a16b, + dnnl_Oihw16o = dnnl_Abcd16a, + dnnl_OIhw4i8o4i = dnnl_ABcd4b8a4b, + dnnl_OhwI4i8o4i = dnnl_AcdB4b8a4b, + dnnl_OIhw4i16o4i = dnnl_ABcd4b16a4b, + dnnl_OhwI4i16o4i = dnnl_AcdB4b16a4b, + dnnl_OIhw4i24o4i = dnnl_ABcd4b24a4b, + dnnl_OhwI4i24o4i = dnnl_AcdB4b24a4b, + dnnl_OIhw4i32o4i = dnnl_ABcd4b32a4b, + dnnl_OhwI4i32o4i = dnnl_AcdB4b32a4b, + dnnl_OIhw4i64o4i = dnnl_ABcd4b64a4b, + dnnl_OhwI4i64o4i = dnnl_AcdB4b64a4b, + dnnl_OIhw16i16o4i = dnnl_ABcd16b16a4b, + dnnl_OIhw16i16o2i = dnnl_ABcd16b16a2b, + dnnl_OIhw16o16i2o = dnnl_ABcd16a16b2a, + dnnl_OIhw4i4o = dnnl_ABcd4b4a, + dnnl_OIhw4o4i = dnnl_ABcd4a4b, + dnnl_Oihw4o = dnnl_Abcd4a, + dnnl_OIhw8i8o2i = dnnl_ABcd8b8a2b, + dnnl_OhwI8i8o2i = dnnl_AcdB8b8a2b, + dnnl_OIhw8i16o2i = dnnl_ABcd8b16a2b, + dnnl_OhwI8i16o2i = dnnl_AcdB8b16a2b, + dnnl_OIhw8i32o2i = dnnl_ABcd8b32a2b, + dnnl_OhwI8i32o2i = dnnl_AcdB8b32a2b, + dnnl_OIhw8i24o2i = dnnl_ABcd8b24a2b, + dnnl_OhwI8i24o2i = dnnl_AcdB8b24a2b, + dnnl_OIhw8i64o2i = dnnl_ABcd8b64a2b, + dnnl_OhwI8i64o2i = dnnl_AcdB8b64a2b, + dnnl_OIhw8i8o = dnnl_ABcd8b8a, + dnnl_OhwI8i8o = dnnl_AcdB8b8a, + dnnl_OIhw8o16i2o = dnnl_ABcd8a16b2a, + dnnl_OIhw2i8o4i = dnnl_ABcd2b8a4b, + dnnl_IOhw8o16i2o = dnnl_BAcd8a16b2a, + dnnl_OIhw8o8i = dnnl_ABcd8a8b, + dnnl_OIhw8o4i = dnnl_ABcd8a4b, + dnnl_Owhi16o = dnnl_Adcb16a, + dnnl_OIhw8i32o = dnnl_ABcd8b32a, + dnnl_OhwI8i32o = dnnl_AcdB8b32a, + dnnl_OIhw8i24o = dnnl_ABcd8b24a, + dnnl_OhwI8i24o = dnnl_AcdB8b24a, + dnnl_OIhw8i16o = dnnl_ABcd8b16a, + dnnl_OhwI8i16o = dnnl_AcdB8b16a, + + // weights, 5D + dnnl_Odhwi16o = dnnl_Acdeb16a, + dnnl_OdhwI16o2i = dnnl_AcdeB16a2b, + dnnl_OdhwI16o4i = dnnl_AcdeB16a4b, + dnnl_Idhwo8i = dnnl_Bcdea8b, + dnnl_IdhwO8i2o = dnnl_BcdeA8b2a, + dnnl_IdhwO8i4o = dnnl_BcdeA8b4a, + dnnl_Idhwo16i = dnnl_Bcdea16b, + dnnl_IdhwO16i2o = dnnl_BcdeA16b2a, + dnnl_IdhwO16i4o = dnnl_BcdeA16b4a, + dnnl_Idhwo24i = dnnl_Bcdea24b, + dnnl_IdhwO24i2o = dnnl_BcdeA24b2a, + dnnl_IdhwO24i4o = dnnl_BcdeA24b4a, + dnnl_Odhwi4o = dnnl_Acdeb4a, + dnnl_Odhwi8o = dnnl_Acdeb8a, + dnnl_OdhwI8o2i = dnnl_AcdeB8a2b, + dnnl_OdhwI8o4i = dnnl_AcdeB8a4b, + dnnl_Odwhi16o = dnnl_Acedb16a, + dnnl_OIdhw16i16o = dnnl_ABcde16b16a, + dnnl_OdhwI16i16o = dnnl_AcdeB16b16a, + dnnl_OIdhw16i32o = dnnl_ABcde16b32a, + dnnl_OdhwI16i32o = dnnl_AcdeB16b32a, + dnnl_OIdhw16i48o = dnnl_ABcde16b48a, + dnnl_OdhwI16i48o = dnnl_AcdeB16b48a, + dnnl_OIdhw16i64o = dnnl_ABcde16b64a, + dnnl_OdhwI16i64o = dnnl_AcdeB16b64a, + dnnl_OIdhw16o16i = dnnl_ABcde16a16b, + dnnl_Oidhw16o = dnnl_Abcde16a, + dnnl_OIdhw4i4o = dnnl_ABcde4b4a, + dnnl_OIdhw4o4i = dnnl_ABcde4a4b, + dnnl_Oidhw4o = dnnl_Abcde4a, + dnnl_OIdhw8i8o2i = dnnl_ABcde8b8a2b, + dnnl_OdhwI8i8o2i = dnnl_AcdeB8b8a2b, + dnnl_OIdhw8i16o2i = dnnl_ABcde8b16a2b, + dnnl_OdhwI8i16o2i = dnnl_AcdeB8b16a2b, + dnnl_OIdhw8i32o2i = dnnl_ABcde8b32a2b, + dnnl_OdhwI8i32o2i = dnnl_AcdeB8b32a2b, + dnnl_OIdhw8i24o2i = dnnl_ABcde8b24a2b, + dnnl_OdhwI8i24o2i = dnnl_AcdeB8b24a2b, + dnnl_OIdhw8i64o2i = dnnl_ABcde8b64a2b, + dnnl_OdhwI8i64o2i = dnnl_AcdeB8b64a2b, + dnnl_OIdhw8i8o = dnnl_ABcde8b8a, + dnnl_OdhwI8i8o = dnnl_AcdeB8b8a, + dnnl_OIdhw8o16i2o = dnnl_ABcde8a16b2a, + dnnl_IOdhw8o16i2o = dnnl_BAcde8a16b2a, + dnnl_OIdhw4i8o4i = dnnl_ABcde4b8a4b, + dnnl_OdhwI4i8o4i = dnnl_AcdeB4b8a4b, + dnnl_OIdhw4i16o4i = dnnl_ABcde4b16a4b, + dnnl_OdhwI4i16o4i = dnnl_AcdeB4b16a4b, + dnnl_OIdhw4i24o4i = dnnl_ABcde4b24a4b, + dnnl_OdhwI4i24o4i = dnnl_AcdeB4b24a4b, + dnnl_OIdhw4i32o4i = dnnl_ABcde4b32a4b, + dnnl_OdhwI4i32o4i = dnnl_AcdeB4b32a4b, + dnnl_OIdhw4i64o4i = dnnl_ABcde4b64a4b, + dnnl_OdhwI4i64o4i = dnnl_AcdeB4b64a4b, + dnnl_OIdhw16i16o4i = dnnl_ABcde16b16a4b, + dnnl_OIdhw16i16o2i = dnnl_ABcde16b16a2b, + dnnl_OIdhw2i8o4i = dnnl_ABcde2b8a4b, + dnnl_OIdhw8o8i = dnnl_ABcde8a8b, + dnnl_OIdhw8o4i = dnnl_ABcde8a4b, + dnnl_IOdhw16i16o = dnnl_BAcde16b16a, + dnnl_OIdhw4o8i8o4i = dnnl_ABcde4a8b8a4b, + dnnl_IOdhw8o8i = dnnl_BAcde8a8b, + dnnl_IOdhw16o16i = dnnl_BAcde16a16b, + dnnl_OIdhw16o16i2o = dnnl_ABcde16a16b2a, + dnnl_OIdhw8i32o = dnnl_ABcde8b32a, + dnnl_OdhwI8i32o = dnnl_AcdeB8b32a, + dnnl_OIdhw8i24o = dnnl_ABcde8b24a, + dnnl_OdhwI8i24o = dnnl_AcdeB8b24a, + dnnl_OIdhw8i16o = dnnl_ABcde8b16a, + dnnl_OdhwI8i16o = dnnl_AcdeB8b16a, + + // weights w/ groups, 3D + dnnl_Goiw16g = dnnl_Abcd16a, + dnnl_Goiw8g = dnnl_Abcd8a, + dnnl_Goiw4g = dnnl_Abcd4a, + dnnl_gIOw8o8i = dnnl_aCBd8b8c, + dnnl_gIOw16o16i = dnnl_aCBd16b16c, + dnnl_gIOw16i16o = dnnl_aCBd16c16b, + dnnl_gOIw16i16o = dnnl_aBCd16c16b, + dnnl_gOIw16o16i = dnnl_aBCd16b16c, + dnnl_gOiw16o = dnnl_aBcd16b, + dnnl_gOIw4i16o4i = dnnl_aBCd4c16b4c, + dnnl_gOIw2i8o4i = dnnl_aBCd2c8b4c, + dnnl_gOIw16i16o4i = dnnl_aBCd16c16b4c, + dnnl_gOIw16i16o2i = dnnl_aBCd16c16b2c, + dnnl_gOIw16o16i2o = dnnl_aBCd16b16c2b, + dnnl_gOIw4i4o = dnnl_aBCd4c4b, + dnnl_gOIw4o4i = dnnl_aBCd4b4c, + dnnl_gOiw4o = dnnl_aBcd4b, + dnnl_gOIw8i16o2i = dnnl_aBCd8c16b2c, + dnnl_gOIw8i8o = dnnl_aBCd8c8b, + dnnl_gOIw8o16i2o = dnnl_aBCd8b16c2b, + dnnl_gIOw8o16i2o = dnnl_aCBd8b16c2b, + dnnl_gOIw8o8i = dnnl_aBCd8b8c, + dnnl_gOIw8o4i = dnnl_aBCd8b4c, + dnnl_gOwi16o = dnnl_aBdc16b, + dnnl_gOwI16o2i = dnnl_aBdC16b2c, + dnnl_gOwI16o4i = dnnl_aBdC16b4c, + dnnl_gIwo8i = dnnl_aCdb8c, + dnnl_gIwO8i2o = dnnl_aCdB8c2b, + dnnl_gIwO8i4o = dnnl_aCdB8c4b, + dnnl_gIwo16i = dnnl_aCdb16c, + dnnl_gIwO16i2o = dnnl_aCdB16c2b, + dnnl_gIwO16i4o = dnnl_aCdB16c4b, + dnnl_gIwo24i = dnnl_aCdb24c, + dnnl_gIwO24i2o = dnnl_aCdB24c2b, + dnnl_gIwO24i4o = dnnl_aCdB24c4b, + dnnl_gOwi4o = dnnl_aBdc4b, + dnnl_gOwi8o = dnnl_aBdc8b, + dnnl_gOwI8o2i = dnnl_aBdC8b2c, + dnnl_gOwI8o4i = dnnl_aBdC8b4c, + dnnl_Goiw32g = dnnl_Abcd32a, + dnnl_gOIw2i4o2i = dnnl_aBCd2c4b2c, + dnnl_gOIw2o4i2o = dnnl_aBCd2b4c2b, + dnnl_gOIw4i8o2i = dnnl_aBCd4c8b2c, + dnnl_gOIw4o8i2o = dnnl_aBCd4b8c2b, + dnnl_goIw4i = dnnl_abCd4c, + dnnl_goIw32i = dnnl_abCd32c, + + // weights w/ groups, 4D + dnnl_gIOhw16i16o = dnnl_aCBde16c16b, + dnnl_gIOhw8o8i = dnnl_aCBde8b8c, + dnnl_gIOhw16o16i = dnnl_aCBde16b16c, + dnnl_gOhwi16o = dnnl_aBdec16b, + dnnl_gOhwI16o2i = dnnl_aBdeC16b2c, + dnnl_gOhwI16o4i = dnnl_aBdeC16b4c, + dnnl_gIhwo8i = dnnl_aCdeb8c, + dnnl_gIhwO8i2o = dnnl_aCdeB8c2b, + dnnl_gIhwO8i4o = dnnl_aCdeB8c4b, + dnnl_gIhwo16i = dnnl_aCdeb16c, + dnnl_gIhwO16i2o = dnnl_aCdeB16c2b, + dnnl_gIhwO16i4o = dnnl_aCdeB16c4b, + dnnl_gIhwo24i = dnnl_aCdeb24c, + dnnl_gIhwO24i2o = dnnl_aCdeB24c2b, + dnnl_gIhwO24i4o = dnnl_aCdeB24c4b, + dnnl_gOhwi32o = dnnl_aBdec32b, + dnnl_gOhwi24o = dnnl_aBdec24b, + dnnl_gOhwI24o2i = dnnl_aBdeC24b2c, + dnnl_gOhwI24o4i = dnnl_aBdeC24b4c, + dnnl_gOhwi4o = dnnl_aBdec4b, + dnnl_gOhwi8o = dnnl_aBdec8b, + dnnl_gOhwI8o2i = dnnl_aBdeC8b2c, + dnnl_gOhwI8o4i = dnnl_aBdeC8b4c, + dnnl_Goihw16g = dnnl_Abcde16a, + dnnl_gOIhw16i16o = dnnl_aBCde16c16b, + dnnl_gOIhw16o16i = dnnl_aBCde16b16c, + dnnl_gOihw16o = dnnl_aBcde16b, + dnnl_gOIhw2i8o4i = dnnl_aBCde2c8b4c, + dnnl_gOIhw4i16o4i = dnnl_aBCde4c16b4c, + dnnl_gOIhw16i16o4i = dnnl_aBCde16c16b4c, + dnnl_gOIhw16i16o2i = dnnl_aBCde16c16b2c, + dnnl_gOIhw16o16i2o = dnnl_aBCde16b16c2b, + dnnl_gOIhw4i4o = dnnl_aBCde4c4b, + dnnl_gOIhw4o4i = dnnl_aBCde4b4c, + dnnl_gOihw4o = dnnl_aBcde4b, + dnnl_Goihw8g = dnnl_Abcde8a, + dnnl_Goihw4g = dnnl_Abcde4a, + dnnl_gOIhw8i16o2i = dnnl_aBCde8c16b2c, + dnnl_gOIhw8i8o = dnnl_aBCde8c8b, + dnnl_gOIhw8o16i2o = dnnl_aBCde8b16c2b, + dnnl_gIOhw8o16i2o = dnnl_aCBde8b16c2b, + dnnl_gOIhw8o8i = dnnl_aBCde8b8c, + dnnl_gOIhw8o4i = dnnl_aBCde8b4c, + dnnl_Goihw32g = dnnl_Abcde32a, + dnnl_gOwhi16o = dnnl_aBedc16b, + dnnl_goIhw4i = dnnl_abCde4c, + dnnl_goIhw32i = dnnl_abCde32c, + + dnnl_OIw4o8i8o4i = dnnl_ABc4a8b8a4b, + dnnl_OIhw4o8i8o4i = dnnl_ABcd4a8b8a4b, + dnnl_IOw4i8o8i4o = dnnl_BAc4b8a8b4a, + dnnl_IOhw4i8o8i4o = dnnl_BAcd4b8a8b4a, + dnnl_IOdhw4i8o8i4o = dnnl_BAcde4b8a8b4a, + + dnnl_OIhw2o8i8o2i = dnnl_ABcd2a8b8a2b, + dnnl_gOIw4o8i8o4i = dnnl_aBCd4b8c8b4c, + dnnl_gOIhw4o8i8o4i = dnnl_aBCde4b8c8b4c, + dnnl_gOIdhw4o8i8o4i = dnnl_aBCdef4b8c8b4c, + dnnl_gIOw4i8o8i4o = dnnl_aCBd4c8b8c4b, + dnnl_gIOhw4i8o8i4o = dnnl_aCBde4c8b8c4b, + dnnl_gIOdhw4i8o8i4o = dnnl_aCBdef4c8b8c4b, + dnnl_gOIhw2o8i8o2i = dnnl_aBCde2b8c8b2c, + dnnl_gOIhw2i4o2i = dnnl_aBCde2c4b2c, + dnnl_gOIhw2o4i2o = dnnl_aBCde2b4c2b, + dnnl_gOIhw4i8o2i = dnnl_aBCde4c8b2c, + dnnl_gOIhw4o8i2o = dnnl_aBCde4b8c2b, + + // weights w/ groups, 6D + dnnl_gIOdhw16i16o = dnnl_aCBdef16c16b, + dnnl_gIOdhw8o8i = dnnl_aCBdef8b8c, + dnnl_gIOdhw16o16i = dnnl_aCBdef16b16c, + dnnl_gOdhwi16o = dnnl_aBdefc16b, + dnnl_gOdhwI16o2i = dnnl_aBdefC16b2c, + dnnl_gOdhwI16o4i = dnnl_aBdefC16b4c, + dnnl_gIdhwo8i = dnnl_aCdefb8c, + dnnl_gIdhwO8i2o = dnnl_aCdefB8c2b, + dnnl_gIdhwO8i4o = dnnl_aCdefB8c4b, + dnnl_gIdhwo16i = dnnl_aCdefb16c, + dnnl_gIdhwO16i2o = dnnl_aCdefB16c2b, + dnnl_gIdhwO16i4o = dnnl_aCdefB16c4b, + dnnl_gIdhwo24i = dnnl_aCdefb24c, + dnnl_gIdhwO24i2o = dnnl_aCdefB24c2b, + dnnl_gIdhwO24i4o = dnnl_aCdefB24c4b, + dnnl_gOdhwi4o = dnnl_aBdefc4b, + dnnl_gOdhwi8o = dnnl_aBdefc8b, + dnnl_gOdhwI8o2i = dnnl_aBdefC8b2c, + dnnl_gOdhwI8o4i = dnnl_aBdefC8b4c, + dnnl_gOdwhi16o = dnnl_aBdfec16b, + dnnl_gOIdhw16i16o = dnnl_aBCdef16c16b, + dnnl_gOIdhw4i16o4i = dnnl_aBCdef4c16b4c, + dnnl_gOIdhw16i16o4i = dnnl_aBCdef16c16b4c, + dnnl_gOIdhw2i8o4i = dnnl_aBCdef2c8b4c, + dnnl_gOIdhw16i16o2i = dnnl_aBCdef16c16b2c, + dnnl_gOIdhw16o16i = dnnl_aBCdef16b16c, + dnnl_gOIdhw16o16i2o = dnnl_aBCdef16b16c2b, + dnnl_gOidhw16o = dnnl_aBcdef16b, + dnnl_gOIdhw4i4o = dnnl_aBCdef4c4b, + dnnl_gOIdhw4o4i = dnnl_aBCdef4b4c, + dnnl_gOidhw4o = dnnl_aBcdef4b, + dnnl_gOIdhw8i16o2i = dnnl_aBCdef8c16b2c, + dnnl_gOIdhw8i8o = dnnl_aBCdef8c8b, + dnnl_gOIdhw8o16i2o = dnnl_aBCdef8b16c2b, + dnnl_gIOdhw8o16i2o = dnnl_aCBdef8b16c2b, + dnnl_gOIdhw8o8i = dnnl_aBCdef8b8c, + dnnl_gOIdhw8o4i = dnnl_aBCdef8b4c, + dnnl_Goidhw16g = dnnl_Abcdef16a, + dnnl_Goidhw32g = dnnl_Abcdef32a, + dnnl_gOIdhw2i4o2i = dnnl_aBCdef2c4b2c, + dnnl_gOIdhw4i8o2i = dnnl_aBCdef4c8b2c, + dnnl_gOIdhw2o4i2o = dnnl_aBCdef2b4c2b, + dnnl_gOIdhw4o8i2o = dnnl_aBCdef4b8c2b, + dnnl_goIdhw4i = dnnl_abCdef4c, + dnnl_goIdhw32i = dnnl_abCdef32c, + + // weights, 3D + dnnl_Owi24o = dnnl_Acb24a, + dnnl_OwI24o2i = dnnl_AcB24a2b, + dnnl_OwI24o4i = dnnl_AcB24a4b, + dnnl_Owi32o = dnnl_Acb32a, + dnnl_OwI32o2i = dnnl_AcB32a2b, + dnnl_OwI32o4i = dnnl_AcB32a4b, + dnnl_Owi48o = dnnl_Acb48a, + dnnl_OwI48o2i = dnnl_AcB48a2b, + dnnl_OwI48o4i = dnnl_AcB48a4b, + dnnl_Owi64o = dnnl_Acb64a, + dnnl_OwI64o2i = dnnl_AcB64a2b, + dnnl_OwI64o4i = dnnl_AcB64a4b, + dnnl_Iwo32i = dnnl_Bca32b, + dnnl_IwO32i2o = dnnl_BcA32b2a, + dnnl_IwO32i4o = dnnl_BcA32b4a, + dnnl_Iwo48i = dnnl_Bca48b, + dnnl_IwO48i2o = dnnl_BcA48b2a, + dnnl_IwO48i4o = dnnl_BcA48b4a, + dnnl_Iwo64i = dnnl_Bca64b, + dnnl_IwO64i2o = dnnl_BcA64b2a, + dnnl_IwO64i4o = dnnl_BcA64b4a, + dnnl_wIo2i = dnnl_cBa2b, + dnnl_wIo4i = dnnl_cBa4b, + dnnl_gOwi24o = dnnl_aBdc24b, + dnnl_gOwI24o2i = dnnl_aBdC24b2c, + dnnl_gOwI24o4i = dnnl_aBdC24b4c, + dnnl_gOwi32o = dnnl_aBdc32b, + dnnl_gOwI32o2i = dnnl_aBdC32b2c, + dnnl_gOwI32o4i = dnnl_aBdC32b4c, + dnnl_gOwi48o = dnnl_aBdc48b, + dnnl_gOwI48o2i = dnnl_aBdC48b2c, + dnnl_gOwI48o4i = dnnl_aBdC48b4c, + dnnl_gOwi64o = dnnl_aBdc64b, + dnnl_gOwI64o2i = dnnl_aBdC64b2c, + dnnl_gOwI64o4i = dnnl_aBdC64b4c, + dnnl_gIwo32i = dnnl_aCdb32c, + dnnl_gIwO32i2o = dnnl_aCdB32c2b, + dnnl_gIwO32i4o = dnnl_aCdB32c4b, + dnnl_gIwo48i = dnnl_aCdb48c, + dnnl_gIwO48i2o = dnnl_aCdB48c2b, + dnnl_gIwO48i4o = dnnl_aCdB48c4b, + dnnl_gIwo64i = dnnl_aCdb64c, + dnnl_gIwO64i2o = dnnl_aCdB64c2b, + dnnl_gIwO64i4o = dnnl_aCdB64c4b, + dnnl_gwio = dnnl_adcb, + dnnl_gwIo2i = dnnl_adCb2c, + dnnl_gwIo4i = dnnl_adCb4c, + // weights, 4D + dnnl_OhwI24o = dnnl_Acdb24a, + dnnl_OhwI24o2i = dnnl_AcdB24a2b, + dnnl_OhwI24o4i = dnnl_AcdB24a4b, + dnnl_OhwI32o = dnnl_Acdb32a, + dnnl_OhwI32o2i = dnnl_AcdB32a2b, + dnnl_OhwI32o4i = dnnl_AcdB32a4b, + dnnl_Ohwi48o = dnnl_Acdb48a, + dnnl_OhwI48o2i = dnnl_AcdB48a2b, + dnnl_OhwI48o4i = dnnl_AcdB48a4b, + dnnl_Ohwi64o = dnnl_Acdb64a, + dnnl_OhwI64o2i = dnnl_AcdB64a2b, + dnnl_OhwI64o4i = dnnl_AcdB64a4b, + dnnl_Ihwo32i = dnnl_Bcda32b, + dnnl_IhwO32i2o = dnnl_BcdA32b2a, + dnnl_IhwO32i4o = dnnl_BcdA32b4a, + dnnl_Ihwo48i = dnnl_Bcda48b, + dnnl_IhwO48i2o = dnnl_BcdA48b2a, + dnnl_IhwO48i4o = dnnl_BcdA48b4a, + dnnl_Ihwo64i = dnnl_Bcda64b, + dnnl_IhwO64i2o = dnnl_BcdA64b2a, + dnnl_IhwO64i4o = dnnl_BcdA64b4a, + dnnl_hwIo2i = dnnl_cdBa2b, + dnnl_hwIo4i = dnnl_cdBa4b, + dnnl_gOhwI24o = dnnl_aBdec24b, + dnnl_gOhwI32o = dnnl_aBdec32b, + dnnl_gOhwI32o2i = dnnl_aBdeC32b2c, + dnnl_gOhwI32o4i = dnnl_aBdeC32b4c, + dnnl_gOhwi48o = dnnl_aBdec48b, + dnnl_gOhwI48o2i = dnnl_aBdeC48b2c, + dnnl_gOhwI48o4i = dnnl_aBdeC48b4c, + dnnl_gOhwi64o = dnnl_aBdec64b, + dnnl_gOhwI64o2i = dnnl_aBdeC64b2c, + dnnl_gOhwI64o4i = dnnl_aBdeC64b4c, + dnnl_gIhwo32i = dnnl_aCdeb32c, + dnnl_gIhwO32i2o = dnnl_aCdeB32c2b, + dnnl_gIhwO32i4o = dnnl_aCdeB32c4b, + dnnl_gIhwo48i = dnnl_aCdeb48c, + dnnl_gIhwO48i2o = dnnl_aCdeB48c2b, + dnnl_gIhwO48i4o = dnnl_aCdeB48c4b, + dnnl_gIhwo64i = dnnl_aCdeb64c, + dnnl_gIhwO64i2o = dnnl_aCdeB64c2b, + dnnl_gIhwO64i4o = dnnl_aCdeB64c4b, + dnnl_ghwio = dnnl_adecb, + dnnl_ghwIo2i = dnnl_adeCb2c, + dnnl_ghwIo4i = dnnl_adeCb4c, + // weights, 5D + dnnl_Odhwi24o = dnnl_Acdeb24a, + dnnl_OdhwI24o2i = dnnl_AcdeB24a2b, + dnnl_OdhwI24o4i = dnnl_AcdeB24a4b, + dnnl_Odhwi32o = dnnl_Acdeb32a, + dnnl_OdhwI32o2i = dnnl_AcdeB32a2b, + dnnl_OdhwI32o4i = dnnl_AcdeB32a4b, + dnnl_Odhwi48o = dnnl_Acdeb48a, + dnnl_OdhwI48o2i = dnnl_AcdeB48a2b, + dnnl_OdhwI48o4i = dnnl_AcdeB48a4b, + dnnl_Odhwi64o = dnnl_Acdeb64a, + dnnl_OdhwI64o2i = dnnl_AcdeB64a2b, + dnnl_OdhwI64o4i = dnnl_AcdeB64a4b, + dnnl_Idhwo32i = dnnl_Bcdea32b, + dnnl_IdhwO32i2o = dnnl_BcdeA32b2a, + dnnl_IdhwO32i4o = dnnl_BcdeA32b4a, + dnnl_Idhwo48i = dnnl_Bcdea48b, + dnnl_IdhwO48i2o = dnnl_BcdeA48b2a, + dnnl_IdhwO48i4o = dnnl_BcdeA48b4a, + dnnl_Idhwo64i = dnnl_Bcdea64b, + dnnl_IdhwO64i2o = dnnl_BcdeA64b2a, + dnnl_IdhwO64i4o = dnnl_BcdeA64b4a, + dnnl_dhwIo2i = dnnl_cdeBa2b, + dnnl_dhwIo4i = dnnl_cdeBa4b, + dnnl_gOdhwi24o = dnnl_aBdefc24b, + dnnl_gOdhwI24o2i = dnnl_aBdefC24b2c, + dnnl_gOdhwI24o4i = dnnl_aBdefC24b4c, + dnnl_gOdhwi32o = dnnl_aBdefc32b, + dnnl_gOdhwI32o2i = dnnl_aBdefC32b2c, + dnnl_gOdhwI32o4i = dnnl_aBdefC32b4c, + dnnl_gOdhwi48o = dnnl_aBdefc48b, + dnnl_gOdhwI48o2i = dnnl_aBdefC48b2c, + dnnl_gOdhwI48o4i = dnnl_aBdefC48b4c, + dnnl_gOdhwi64o = dnnl_aBdefc64b, + dnnl_gOdhwI64o2i = dnnl_aBdefC64b2c, + dnnl_gOdhwI64o4i = dnnl_aBdefC64b4c, + dnnl_gIdhwo32i = dnnl_aCdefb32c, + dnnl_gIdhwO32i2o = dnnl_aCdefB32c2b, + dnnl_gIdhwO32i4o = dnnl_aCdefB32c4b, + dnnl_gIdhwo48i = dnnl_aCdefb48c, + dnnl_gIdhwO48i2o = dnnl_aCdefB48c2b, + dnnl_gIdhwO48i4o = dnnl_aCdefB48c4b, + dnnl_gIdhwo64i = dnnl_aCdefb64c, + dnnl_gIdhwO64i2o = dnnl_aCdefB64c2b, + dnnl_gIdhwO64i4o = dnnl_aCdefB64c4b, + dnnl_gdhwio = dnnl_adefcb, + dnnl_gdhwIo2i = dnnl_adefCb2c, + dnnl_gdhwIo4i = dnnl_adefCb4c, + dnnl_OI16i32o4i = dnnl_AB16b32a4b, + dnnl_OI16i48o4i = dnnl_AB16b48a4b, + dnnl_OI16i64o4i = dnnl_AB16b64a4b, + dnnl_OI16i16o2i = dnnl_AB16b16a2b, + dnnl_OI16i32o2i = dnnl_AB16b32a2b, + dnnl_OI16i48o2i = dnnl_AB16b48a2b, + dnnl_OI16i64o2i = dnnl_AB16b64a2b, + dnnl_OIw16i32o4i = dnnl_ABc16b32a4b, + dnnl_OIw16i48o4i = dnnl_ABc16b48a4b, + dnnl_OIw16i64o4i = dnnl_ABc16b64a4b, + dnnl_OIw16i32o2i = dnnl_ABc16b32a2b, + dnnl_OIw16i48o2i = dnnl_ABc16b48a2b, + dnnl_OIw16i64o2i = dnnl_ABc16b64a2b, + dnnl_OIhw16i32o4i = dnnl_ABcd16b32a4b, + dnnl_OIhw16i48o4i = dnnl_ABcd16b48a4b, + dnnl_OIhw16i64o4i = dnnl_ABcd16b64a4b, + dnnl_OIhw16i32o2i = dnnl_ABcd16b32a2b, + dnnl_OIhw16i48o2i = dnnl_ABcd16b48a2b, + dnnl_OIhw16i64o2i = dnnl_ABcd16b64a2b, + dnnl_OIdhw16i32o4i = dnnl_ABcde16b32a4b, + dnnl_OIdhw16i48o4i = dnnl_ABcde16b48a4b, + dnnl_OIdhw16i64o4i = dnnl_ABcde16b64a4b, + dnnl_OIdhw16i32o2i = dnnl_ABcde16b32a2b, + dnnl_OIdhw16i48o2i = dnnl_ABcde16b48a2b, + dnnl_OIdhw16i64o2i = dnnl_ABcde16b64a2b, + dnnl_OwI16i16o2i = dnnl_AcB16b16a2b, + dnnl_OwI16i16o4i = dnnl_AcB16b16a4b, + dnnl_OhwI16i16o2i = dnnl_AcdB16b16a2b, + dnnl_OhwI16i16o4i = dnnl_AcdB16b16a4b, + dnnl_OdhwI16i16o2i = dnnl_AcdeB16b16a2b, + dnnl_OdhwI16i16o4i = dnnl_AcdeB16b16a4b, + dnnl_IwO16o16i2o = dnnl_BcA16a16b2a, + dnnl_IwO16o16i4o = dnnl_BcA16a16b4a, + dnnl_IhwO16o16i2o = dnnl_BcdA16a16b2a, + dnnl_IhwO16o16i4o = dnnl_BcdA16a16b4a, + dnnl_IdhwO16o16i2o = dnnl_BcdeA16a16b2a, + dnnl_IdhwO16o16i4o = dnnl_BcdeA16a16b4a, + dnnl_gOwI16i16o2i = dnnl_aBdC16c16b2c, + dnnl_gOwI16i16o4i = dnnl_aBdC16c16b4c, + dnnl_gOhwI16i16o2i = dnnl_aBdeC16c16b2c, + dnnl_gOhwI16i16o4i = dnnl_aBdeC16c16b4c, + dnnl_gOdhwI16i16o2i = dnnl_aBdefC16c16b2c, + dnnl_gOdhwI16i16o4i = dnnl_aBdefC16c16b4c, + dnnl_gIwO16o16i2o = dnnl_aCdB16b16c2b, + dnnl_gIwO16o16i4o = dnnl_aCdB16b16c4b, + dnnl_gIhwO16o16i2o = dnnl_aCdeB16b16c2b, + dnnl_gIhwO16o16i4o = dnnl_aCdeB16b16c4b, + dnnl_gIdhwO16o16i2o = dnnl_aCdefB16b16c2b, + dnnl_gIdhwO16o16i4o = dnnl_aCdefB16b16c4b, + dnnl_OwI16i32o2i = dnnl_AcB16b32a2b, + dnnl_OwI16i32o4i = dnnl_AcB16b32a4b, + dnnl_OwI16i48o2i = dnnl_AcB16b48a2b, + dnnl_OwI16i48o4i = dnnl_AcB16b48a4b, + dnnl_OwI16i64o2i = dnnl_AcB16b64a2b, + dnnl_OwI16i64o4i = dnnl_AcB16b64a4b, + dnnl_IwO16o32i2o = dnnl_BcA16a32b2a, + dnnl_IwO16o32i4o = dnnl_BcA16a32b4a, + dnnl_IwO16o48i2o = dnnl_BcA16a48b2a, + dnnl_IwO16o48i4o = dnnl_BcA16a48b4a, + dnnl_IwO16o64i2o = dnnl_BcA16a64b2a, + dnnl_IwO16o64i4o = dnnl_BcA16a64b4a, + dnnl_gOwI16i32o2i = dnnl_aBdC16c32b2c, + dnnl_gOwI16i32o4i = dnnl_aBdC16c32b4c, + dnnl_gOwI16i48o2i = dnnl_aBdC16c48b2c, + dnnl_gOwI16i48o4i = dnnl_aBdC16c48b4c, + dnnl_gOwI16i64o2i = dnnl_aBdC16c64b2c, + dnnl_gOwI16i64o4i = dnnl_aBdC16c64b4c, + dnnl_gIwO16o32i2o = dnnl_aCdB16b32c2b, + dnnl_gIwO16o32i4o = dnnl_aCdB16b32c4b, + dnnl_gIwO16o48i2o = dnnl_aCdB16b48c2b, + dnnl_gIwO16o48i4o = dnnl_aCdB16b48c4b, + dnnl_gIwO16o64i2o = dnnl_aCdB16b64c2b, + dnnl_gIwO16o64i4o = dnnl_aCdB16b64c4b, + dnnl_OhwI16i32o2i = dnnl_AcdB16b32a2b, + dnnl_OhwI16i32o4i = dnnl_AcdB16b32a4b, + dnnl_OhwI16i48o2i = dnnl_AcdB16b48a2b, + dnnl_OhwI16i48o4i = dnnl_AcdB16b48a4b, + dnnl_OhwI16i64o2i = dnnl_AcdB16b64a2b, + dnnl_OhwI16i64o4i = dnnl_AcdB16b64a4b, + dnnl_IhwO16o32i2o = dnnl_BcdA16a32b2a, + dnnl_IhwO16o32i4o = dnnl_BcdA16a32b4a, + dnnl_IhwO16o48i2o = dnnl_BcdA16a48b2a, + dnnl_IhwO16o48i4o = dnnl_BcdA16a48b4a, + dnnl_IhwO16o64i2o = dnnl_BcdA16a64b2a, + dnnl_IhwO16o64i4o = dnnl_BcdA16a64b4a, + dnnl_gOhwI16i32o2i = dnnl_aBdeC16c32b2c, + dnnl_gOhwI16i32o4i = dnnl_aBdeC16c32b4c, + dnnl_gOhwI16i48o2i = dnnl_aBdeC16c48b2c, + dnnl_gOhwI16i48o4i = dnnl_aBdeC16c48b4c, + dnnl_gOhwI16i64o2i = dnnl_aBdeC16c64b2c, + dnnl_gOhwI16i64o4i = dnnl_aBdeC16c64b4c, + dnnl_gIhwO16o32i2o = dnnl_aCdeB16b32c2b, + dnnl_gIhwO16o32i4o = dnnl_aCdeB16b32c4b, + dnnl_gIhwO16o48i2o = dnnl_aCdeB16b48c2b, + dnnl_gIhwO16o48i4o = dnnl_aCdeB16b48c4b, + dnnl_gIhwO16o64i2o = dnnl_aCdeB16b64c2b, + dnnl_gIhwO16o64i4o = dnnl_aCdeB16b64c4b, + dnnl_OdhwI16i32o2i = dnnl_AcdeB16b32a2b, + dnnl_OdhwI16i32o4i = dnnl_AcdeB16b32a4b, + dnnl_OdhwI16i48o2i = dnnl_AcdeB16b48a2b, + dnnl_OdhwI16i48o4i = dnnl_AcdeB16b48a4b, + dnnl_OdhwI16i64o2i = dnnl_AcdeB16b64a2b, + dnnl_OdhwI16i64o4i = dnnl_AcdeB16b64a4b, + dnnl_IdhwO16o32i2o = dnnl_BcdeA16a32b2a, + dnnl_IdhwO16o32i4o = dnnl_BcdeA16a32b4a, + dnnl_IdhwO16o48i2o = dnnl_BcdeA16a48b2a, + dnnl_IdhwO16o48i4o = dnnl_BcdeA16a48b4a, + dnnl_IdhwO16o64i2o = dnnl_BcdeA16a64b2a, + dnnl_IdhwO16o64i4o = dnnl_BcdeA16a64b4a, + dnnl_gOdhwI16i32o2i = dnnl_aBdefC16c32b2c, + dnnl_gOdhwI16i32o4i = dnnl_aBdefC16c32b4c, + dnnl_gOdhwI16i48o2i = dnnl_aBdefC16c48b2c, + dnnl_gOdhwI16i48o4i = dnnl_aBdefC16c48b4c, + dnnl_gOdhwI16i64o2i = dnnl_aBdefC16c64b2c, + dnnl_gOdhwI16i64o4i = dnnl_aBdefC16c64b4c, + dnnl_gIdhwO16o32i2o = dnnl_aCdefB16b32c2b, + dnnl_gIdhwO16o32i4o = dnnl_aCdefB16b32c4b, + dnnl_gIdhwO16o48i2o = dnnl_aCdefB16b48c2b, + dnnl_gIdhwO16o48i4o = dnnl_aCdefB16b48c4b, + dnnl_gIdhwO16o64i2o = dnnl_aCdefB16b64c2b, + dnnl_gIdhwO16o64i4o = dnnl_aCdefB16b64c4b, + dnnl_hwioG16g = dnnl_decbA16a, + dnnl_hwioG8g = dnnl_decbA8a, + dnnl_dhwioG16g = dnnl_defcbA16a, + dnnl_dhwioG8g = dnnl_defcbA8a, + dnnl_NCdhw40n16c = dnnl_ABcde40a16b, + dnnl_NCw40n16c = dnnl_ABc40a16b, + dnnl_NChw40n16c = dnnl_ABcd40a16b, + dnnl_NCw40n32c = dnnl_ABc40a32b, + dnnl_NChw40n32c = dnnl_ABcd40a32b, + dnnl_NCdhw40n32c = dnnl_ABcde40a32b, + dnnl_OIdhw4o8i8o2i = dnnl_ABcde4a8b8a2b, + dnnl_OIhw4o8i8o2i = dnnl_ABcd4a8b8a2b, + dnnl_OIw4o8i8o2i = dnnl_ABc4a8b8a2b, + dnnl_gOIdhw4o8i8o2i = dnnl_aBCdef4b8c8b2c, + dnnl_gOIhw4o8i8o2i = dnnl_aBCde4b8c8b2c, + dnnl_gOIw4o8i8o2i = dnnl_aBCd4b8c8b2c, + dnnl_IOdhw4i8o8i2o = dnnl_BAcde4b8a8b2a, + dnnl_IOhw4i8o8i2o = dnnl_BAcd4b8a8b2a, + dnnl_IOw4i8o8i2o = dnnl_BAc4b8a8b2a, + dnnl_gIOdhw4i8o8i2o = dnnl_aCBdef4c8b8c2b, + dnnl_gIOhw4i8o8i2o = dnnl_aCBde4c8b8c2b, + dnnl_gIOw4i8o8i2o = dnnl_aCBd4c8b8c2b, + dnnl_NCw2c32n8c = dnnl_ABc2b32a8b, + dnnl_NChw2c32n8c = dnnl_ABcd2b32a8b, + dnnl_NCdhw2c32n8c = dnnl_ABcde2b32a8b, + dnnl_OIw2i8o16i4o = dnnl_ABc2b8a16b4a, + dnnl_OIhw2i8o16i4o = dnnl_ABcd2b8a16b4a, + dnnl_OIdhw2i8o16i4o = dnnl_ABcde2b8a16b4a, + dnnl_OIw2o8i16o4i = dnnl_ABc2a8b16a4b, + dnnl_OIw2o8i16o2i = dnnl_ABc2a8b16a2b, + dnnl_IOw2i8o16i4o = dnnl_BAc2b8a16b4a, + dnnl_IOw2i8o16i2o = dnnl_BAc2b8a16b2a, + dnnl_OIhw2o8i16o4i = dnnl_ABcd2a8b16a4b, + dnnl_OIhw2o8i16o2i = dnnl_ABcd2a8b16a2b, + dnnl_IOhw2i8o16i4o = dnnl_BAcd2b8a16b4a, + dnnl_IOhw2i8o16i2o = dnnl_BAcd2b8a16b2a, + dnnl_OIdhw2o8i16o4i = dnnl_ABcde2a8b16a4b, + dnnl_OIdhw2o8i16o2i = dnnl_ABcde2a8b16a2b, + dnnl_IOdhw2i8o16i4o = dnnl_BAcde2b8a16b4a, + dnnl_IOdhw2i8o16i2o = dnnl_BAcde2b8a16b2a, + dnnl_gOIw2o8i16o2i = dnnl_aBCd2b8c16b2c, + dnnl_gIOw2i8o16i2o = dnnl_aCBd2c8b16c2b, + dnnl_gIOhw2i8o16i2o = dnnl_aBCde2c8b16c2b, + dnnl_gIOdhw2i8o16i2o = dnnl_aBCdef2c8b16c2b, + dnnl_gOIhw2o8i16o2i = dnnl_aBCde2b8c16b2c, + dnnl_gOIdhw2o8i16o2i = dnnl_aBCdef2b8c16b2c, + dnnl_gOIw2o8i16o4i = dnnl_aBCd2b8c16b4c, + dnnl_gOIhw2o8i16o4i = dnnl_aBCde2b8c16b4c, +} dnnl_format_tag_t; + +/// @} dnnl_api_memory + +/// @addtogroup dnnl_api_primitives +/// @{ +/// @addtogroup dnnl_api_primitives_common +/// @{ + +/// Kinds of propagation. +typedef enum { + // TODO: suggest renames + /// Undefined propagation type. + dnnl_prop_kind_undef = 0, + /// Forward data propagation (training mode). In this mode primitives + /// perform computations necessary for subsequent backward propagation. + dnnl_forward_training = 64, + /// Forward data propagation (inference mode). In this mode primitives + /// perform only computations that are necessary for inference and omit + /// computations that are necessary only for backward propagation. + dnnl_forward_inference = 96, + /// Forward data propagation (alias for @c dnnl_forward_training). + dnnl_forward = dnnl_forward_training, + /// Backward propagation (with respect to all parameters). + dnnl_backward = 128, + /// Backward data propagation. + dnnl_backward_data = 160, + /// Backward weights propagation. + dnnl_backward_weights = 192, + /// Backward bias propagation. + dnnl_backward_bias = 193, +} dnnl_prop_kind_t; + +/// Kinds of primitives. Used to implement a way to extend the library with new +/// primitives without changing the ABI. +typedef enum { + /// Undefined primitive + dnnl_undefined_primitive, + /// A reorder primitive. + dnnl_reorder, + /// A shuffle primitive. + dnnl_shuffle, + /// A (out-of-place) concat primitive. + dnnl_concat, + /// A sum primitive. + dnnl_sum, + /// A convolution primitive. + dnnl_convolution, + /// A deconvolution primitive. + dnnl_deconvolution, + /// An element-wise primitive. + dnnl_eltwise, + /// An LRN primitive. + dnnl_lrn, + /// A batch normalization primitive. + dnnl_batch_normalization, + /// An inner product primitive. + dnnl_inner_product, + /// A rnn primitive. + dnnl_rnn, + /// A matrix multiplication primitive (internal). + dnnl_gemm, + /// A binary primitive. + dnnl_binary, + /// A matrix multiplication primitive. + dnnl_matmul, + /// A resampling primitive. + dnnl_resampling, + /// A pooling primitive. + dnnl_pooling, + /// A reduction primitive. + dnnl_reduction, + /// A PReLU primitive. + dnnl_prelu, + /// A softmax primitive. + dnnl_softmax, + /// A layer normalization primitive. + dnnl_layer_normalization, + /// A group normalization primitive. + dnnl_group_normalization, + + /// Parameter to allow internal only primitives without undefined behavior. + /// This parameter is chosen to be valid for so long as sizeof(int) >= 2. + dnnl_primitive_kind_max = 0x7fff, +} dnnl_primitive_kind_t; + +/// Kinds of algorithms. +typedef enum { + dnnl_alg_kind_undef, + /// Direct convolution + dnnl_convolution_direct = 0x1, + /// Winograd convolution + dnnl_convolution_winograd = 0x2, + /// Convolution algorithm(either direct or Winograd) is chosen just in time + dnnl_convolution_auto = 0x3, + /// Direct deconvolution + dnnl_deconvolution_direct = 0xa, + /// Winograd deconvolution + dnnl_deconvolution_winograd = 0xb, + /// Eltwise: ReLU + dnnl_eltwise_relu = 0x20, + /// Eltwise: hyperbolic tangent non-linearity (tanh) + dnnl_eltwise_tanh, + /// Eltwise: exponential linear unit (elu) + dnnl_eltwise_elu, + /// Eltwise: square + dnnl_eltwise_square, + /// Eltwise: abs + dnnl_eltwise_abs, + /// Eltwise: square root + dnnl_eltwise_sqrt, + /// Eltwise: linear + dnnl_eltwise_linear, + /// Eltwise: soft_relu + dnnl_eltwise_soft_relu, + /// Eltwise: hardsigmoid + dnnl_eltwise_hardsigmoid, + /// Eltwise: logistic + dnnl_eltwise_logistic, + /// Eltwise: exponent + dnnl_eltwise_exp, + /// Eltwise: gelu + /// + /// @note Tanh approximation formula is used to approximate + /// the cumulative distribution function of a Gaussian here + dnnl_eltwise_gelu_tanh, + /// Eltwise: swish + dnnl_eltwise_swish, + /// Eltwise: natural logarithm + dnnl_eltwise_log, + /// Eltwise: clip + dnnl_eltwise_clip, + /// Eltwise: clip version 2 + dnnl_eltwise_clip_v2, + /// Eltwise: pow + dnnl_eltwise_pow, + /// Eltwise: erf-based gelu + dnnl_eltwise_gelu_erf, + /// Eltwise: round + dnnl_eltwise_round, + /// Eltwise: mish + dnnl_eltwise_mish, + /// Eltwise: hardswish + dnnl_eltwise_hardswish, + /// Eltwise: ReLU (dst for backward) + dnnl_eltwise_relu_use_dst_for_bwd = 0x100, + /// Eltwise: hyperbolic tangent non-linearity (tanh) (dst for backward) + dnnl_eltwise_tanh_use_dst_for_bwd, + /// Eltwise: exponential linear unit (elu) (dst for backward) + dnnl_eltwise_elu_use_dst_for_bwd, + /// Eltwise: square root (dst for backward) + dnnl_eltwise_sqrt_use_dst_for_bwd, + /// Eltwise: logistic (dst for backward) + dnnl_eltwise_logistic_use_dst_for_bwd, + /// Eltwise: exp (dst for backward) + dnnl_eltwise_exp_use_dst_for_bwd, + /// Eltwise: clip version 2 (dst for backward) + dnnl_eltwise_clip_v2_use_dst_for_bwd, + /// Max pooling + dnnl_pooling_max = 0x1ff, + /// Average pooling include padding + dnnl_pooling_avg_include_padding = 0x2ff, + /// Average pooling exclude padding + dnnl_pooling_avg_exclude_padding = 0x3ff, + /// Local response normalization (LRN) across multiple channels + dnnl_lrn_across_channels = 0xaff, + /// LRN within a single channel + dnnl_lrn_within_channel = 0xbff, + /// RNN cell + dnnl_vanilla_rnn = 0x1fff, + /// LSTM cell + dnnl_vanilla_lstm = 0x2fff, + /// GRU cell + dnnl_vanilla_gru = 0x3fff, + /// GRU cell with linear before reset + /// + /// Modification of original GRU cell. Differs from #dnnl_vanilla_gru + /// in how the new memory gate is calculated: + /// \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f] + /// Primitive expects 4 biases on input: + /// \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$ + dnnl_lbr_gru = 0x4fff, + /// AUGRU cell + dnnl_vanilla_augru = 0x5fff, + /// AUGRU cell with linear before reset + dnnl_lbr_augru = 0x6fff, + /// Binary add + dnnl_binary_add = 0x1fff0, + /// Binary mul + dnnl_binary_mul = 0x1fff1, + /// Binary max + dnnl_binary_max = 0x1fff2, + /// Binary min + dnnl_binary_min = 0x1fff3, + /// Binary div + dnnl_binary_div = 0x1fff4, + /// Binary sub + dnnl_binary_sub = 0x1fff5, + /// Binary greater or equal + dnnl_binary_ge = 0x1fff6, + /// Binary greater than + dnnl_binary_gt = 0x1fff7, + /// Binary less or equal + dnnl_binary_le = 0x1fff8, + /// Binary less than + dnnl_binary_lt = 0x1fff9, + /// Binary equal + dnnl_binary_eq = 0x1fffa, + /// Binary not equal + dnnl_binary_ne = 0x1fffb, + /// Binary select + dnnl_binary_select = 0x1fffc, + /// Nearest Neighbor Resampling Method + dnnl_resampling_nearest = 0x2fff0, + /// Linear Resampling Method + dnnl_resampling_linear = 0x2fff1, + /// Reduction using max + dnnl_reduction_max, + /// Reduction using min + dnnl_reduction_min, + /// Reduction using sum + dnnl_reduction_sum, + /// Reduction using mul + dnnl_reduction_mul, + /// Reduction using mean + dnnl_reduction_mean, + /// Reduction using lp norm + dnnl_reduction_norm_lp_max, + /// Reduction using lp norm + dnnl_reduction_norm_lp_sum, + /// Reduction using lp norm without final pth-root + dnnl_reduction_norm_lp_power_p_max, + /// Reduction using lp norm without final pth-root + dnnl_reduction_norm_lp_power_p_sum, + /// Softmax + dnnl_softmax_accurate = 0x30000, + /// Logsoftmax + dnnl_softmax_log, +} dnnl_alg_kind_t; + +/// Flags for normalization primitives. +typedef enum { + /// Use no normalization flags + /// + /// If specified + /// - on forward training propagation mean and variance are computed and + /// stored as output + /// - on backward propagation compute full derivative wrt data + /// - on backward propagation prop_kind == #dnnl_backward_data has the same + /// behavior as prop_kind == #dnnl_backward + dnnl_normalization_flags_none = 0x0U, + + /// Use global statistics + /// + /// If specified + /// - on forward propagation use mean and variance provided by user (input) + /// - on backward propagation reduces the amount of computations, since + /// mean and variance are considered as constants + /// + /// If not specified: + /// - on forward propagation mean and variance are computed and stored as + /// output + /// - on backward propagation compute full derivative wrt data + dnnl_use_global_stats = 0x1U, + + /// Use scale parameter + /// + /// If specified: + /// - on forward propagation use scale for the normalization results + /// - on backward propagation (for prop_kind == #dnnl_backward) compute + /// diff wrt scale (hence one extra output used) + dnnl_use_scale = 0x2U, + + /// Use shift parameter + /// + /// If specified: + /// - on forward propagation use shift (aka bias) for the normalization + /// results + /// - on backward propagation (for prop_kind == #dnnl_backward) compute + /// diff wrt shift (hence one extra output used) + dnnl_use_shift = 0x4U, + + /// Fuse with ReLU + /// + /// The flag implies negative slope being 0. On training this is the only + /// configuration supported. For inference, to use non-zero negative slope + /// consider using @ref dev_guide_attributes_post_ops. + /// + /// If specified: + /// - on inference this option behaves the same as if the primitive were + /// fused with ReLU using post ops API with zero negative slope. + /// - on training primitive requires workspace (required to be able to + /// perform backward pass) + dnnl_fuse_norm_relu = 0x8U, + + /// Fuse with Add and then fuse with ReLU + /// + /// If specified: + /// + /// - on forward propagation apply element-wise binary Add operation to + /// to the normalization results with an additional input tensor and then + /// apply ReLU with negative slope being 0. + /// - on training primitive requires workspace (required to be able to + /// perform backward pass). + /// - on backward propagation save the result of backward ReLU operation + /// with input tensor and workspace from forward pass to extra output + /// tensor and then perform backward normalization. + dnnl_fuse_norm_add_relu = 0x10U, + +} dnnl_normalization_flags_t; + +/// @} dnnl_api_primitives_common +/// @} dnnl_api_primitives + +/// @addtogroup dnnl_api_memory +/// @{ + +/// A wildcard value for dimensions that are unknown at a primitive creation +/// time. +#define DNNL_RUNTIME_DIM_VAL INT64_MIN + +/// A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL. +/// For instance, this value is returned by dnnl_memory_desc_get_size() if +/// either of the dimensions or strides equal to #DNNL_RUNTIME_DIM_VAL. +#define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL) + +/// @cond DO_NOT_DOCUMENT_THIS +/// Hex representation for a **special** quiet NAN (!= NAN from math.h) +static const union { + unsigned u; + float f; +} DNNL_RUNTIME_F32_VAL_REP = {0x7fc000d0}; +/// @endcond + +/// A wildcard value for floating point values that are unknown at a primitive +/// creation time. +#define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f) + +/// @cond DO_NOT_DOCUMENT_THIS +static const int DNNL_RUNTIME_S32_VAL_REP = INT32_MIN; +/// @endcond + +/// A wildcard value for int32_t values that are unknown at a primitive creation +/// time. +#define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP + +/// @struct dnnl_memory_desc +/// An opaque structure to describe a memory descriptor. +struct dnnl_memory_desc; + +/// A memory descriptor handle. +typedef struct dnnl_memory_desc *dnnl_memory_desc_t; + +/// A memory descriptor handle. +typedef const struct dnnl_memory_desc *const_dnnl_memory_desc_t; + +/// @struct dnnl_memory +/// An opaque structure to describe a memory. +struct dnnl_memory; + +/// A memory handle. +typedef struct dnnl_memory *dnnl_memory_t; + +/// A constant memory handle. +typedef const struct dnnl_memory *const_dnnl_memory_t; + +/// @} dnnl_api_memory + +/// @addtogroup dnnl_api_primitives +/// @{ + +/// @addtogroup dnnl_api_rnn +/// @{ + +/// Flags for RNN cell. +typedef enum { + /// Undefined RNN flags + dnnl_rnn_flags_undef = 0x0, + /// Do not add weights gradient to existing diff_weights memory + dnnl_rnn_flags_diff_weights_overwrite = 0x1, +} dnnl_rnn_flags_t; + +/// A direction of RNN primitive execution. +typedef enum { + /// Undefined RNN direction. + dnnl_rnn_direction_undef = 0, + /// Unidirectional execution of RNN primitive from left to right. + dnnl_unidirectional_left2right, + /// Unidirectional execution of RNN primitive from right to left. + dnnl_unidirectional_right2left, + /// Bidirectional execution of RNN primitive with concatenation of the + /// results. + dnnl_bidirectional_concat, + /// Bidirectional execution of RNN primitive with summation of the + /// results. + dnnl_bidirectional_sum, +} dnnl_rnn_direction_t; + +/// @} dnnl_api_rnn + +/// @} dnnl_api_primitives + +/// @addtogroup dnnl_api_primitives +/// @{ +/// @addtogroup dnnl_api_primitives_common +/// @{ + +/// @struct dnnl_primitive_desc +/// @brief An opaque structure to describe a primitive descriptor. +struct dnnl_primitive_desc; + +/// @brief A primitive descriptor handle. +typedef struct dnnl_primitive_desc *dnnl_primitive_desc_t; + +/// @brief A constant primitive descriptor handle. +typedef const struct dnnl_primitive_desc *const_dnnl_primitive_desc_t; + +/// @} dnnl_api_primitives_common + +/// @addtogroup dnnl_api_attributes +/// @{ + +/// Scratchpad mode +typedef enum { + /// The library manages the scratchpad allocation according to the policy + /// specified by the `DNNL_ENABLE_CONCURRENT_EXEC` + /// [build option](@ref dev_guide_build_options) (default). + /// + /// When `DNNL_ENABLE_CONCURRENT_EXEC=OFF` (default), the library + /// scratchpad is common to all primitives to reduce the memory footprint. + /// This configuration comes with limited thread-safety properties, namely + /// primitives can be created and executed in parallel but cannot migrate + /// between threads (in other words, each primitive should be executed in + /// the same thread it was created in). + /// + /// When `DNNL_ENABLE_CONCURRENT_EXEC=ON`, the library scratchpad is + /// private to each primitive. The memory footprint is larger than when + /// using `DNNL_ENABLE_CONCURRENT_EXEC=OFF` but different primitives can be + /// created and run concurrently (the same primitive cannot be run + /// concurrently from two different threads though). + dnnl_scratchpad_mode_library, + /// The user manages the scratchpad allocation by querying and providing + /// the scratchpad memory to primitives. This mode is thread-safe as long + /// as the scratchpad buffers are not used concurrently by two primitive + /// executions. + dnnl_scratchpad_mode_user, +} dnnl_scratchpad_mode_t; + +/// Rounding mode +typedef enum { + /// rounding mode dictated by the floating-point environment + dnnl_rounding_mode_environment, + /// stochastic rounding mode where a random bias is added to the + /// trailing mantissa bits before conversion. + dnnl_rounding_mode_stochastic, +} dnnl_rounding_mode_t; + +/// @struct dnnl_primitive_attr +/// @brief An opaque structure for primitive descriptor attributes. +/// +/// Attributes may contain: +/// - output scales (to scale the result prior to storing it to the memory) +struct dnnl_primitive_attr; + +/// @brief A primitive descriptor attributes handle that controls primitive +/// behavior. +typedef struct dnnl_primitive_attr *dnnl_primitive_attr_t; + +/// @brief A constant primitive descriptor attributes handle. +typedef const struct dnnl_primitive_attr *const_dnnl_primitive_attr_t; + +/// @struct dnnl_post_ops +/// @brief An opaque structure for a chain of post operations. +/// +/// dnnl_post_ops can be used to perform some (trivial) operations like +/// accumulation or eltwise after certain primitives like convolution. +/// +/// Post operations might be combined together, making a chain of post +/// operations. For instance one can configure convolution followed by +/// accumulation followed by eltwise. This might be especially beneficial +/// for residual learning blocks. +/// +/// @warning +/// Of course not all combinations are supported, so the user should handle +/// errors accordingly. +/// +/// Supported post operations: +/// - accumulation (base primitive: convolution) +/// - eltwise (base primitive: convolution) +struct dnnl_post_ops; + +/// @brief A post operation chain handle. +typedef struct dnnl_post_ops *dnnl_post_ops_t; + +/// @brief A constant post operation chain handle. +typedef const struct dnnl_post_ops *const_dnnl_post_ops_t; + +/// @} dnnl_api_attributes + +/// @addtogroup dnnl_api_primitives_common +/// @{ + +/// @struct dnnl_primitive +/// An opaque structure to describe a primitive. +struct dnnl_primitive; +/// A primitive handle. +typedef struct dnnl_primitive *dnnl_primitive_t; +/// A constant primitive handle. +typedef const struct dnnl_primitive *const_dnnl_primitive_t; + +/// Undefined argument. +#define DNNL_ARG_UNDEF 0 +/// Source argument #0. +#define DNNL_ARG_SRC_0 1 +/// A special mnemonic for source argument for primitives that have a +/// single source. An alias for #DNNL_ARG_SRC_0. +#define DNNL_ARG_SRC DNNL_ARG_SRC_0 +/// A special mnemonic for RNN input vector. An alias for +/// #DNNL_ARG_SRC_0. +#define DNNL_ARG_SRC_LAYER DNNL_ARG_SRC_0 +/// A special mnemonic for reorder source argument. An alias for +/// #DNNL_ARG_SRC_0. +#define DNNL_ARG_FROM DNNL_ARG_SRC_0 + +/// Source argument #1. +#define DNNL_ARG_SRC_1 2 +/// A special mnemonic for RNN input recurrent hidden state vector. An alias +/// for #DNNL_ARG_SRC_1. +#define DNNL_ARG_SRC_ITER DNNL_ARG_SRC_1 + +/// Source argument #2. +#define DNNL_ARG_SRC_2 3 +/// A special mnemonic for RNN input recurrent cell state vector. An alias for +/// #DNNL_ARG_SRC_2. +#define DNNL_ARG_SRC_ITER_C DNNL_ARG_SRC_2 + +/// Source argument #3. +#define DNNL_ARG_SRC_3 4 +/// A special mnemonic for RNN input recurrent cell attention vector. An alias for +/// #DNNL_ARG_SRC_3. +#define DNNL_ARG_AUGRU_ATTENTION DNNL_ARG_SRC_3 + +/// Destination argument #0. +#define DNNL_ARG_DST_0 17 +/// A special mnemonic for destination argument for primitives that have a +/// single destination. An alias for #DNNL_ARG_DST_0. +#define DNNL_ARG_DST DNNL_ARG_DST_0 +/// A special mnemonic for reorder destination argument. An alias for +/// #DNNL_ARG_DST_0. +#define DNNL_ARG_TO DNNL_ARG_DST_0 +/// A special mnemonic for RNN output vector. An alias for #DNNL_ARG_DST_0. +#define DNNL_ARG_DST_LAYER DNNL_ARG_DST_0 + +/// Destination argument #1. +#define DNNL_ARG_DST_1 18 +/// A special mnemonic for RNN input recurrent hidden state vector. An +/// alias for #DNNL_ARG_DST_1. +#define DNNL_ARG_DST_ITER DNNL_ARG_DST_1 + +/// Destination argument #2. +#define DNNL_ARG_DST_2 19 +/// A special mnemonic for LSTM output recurrent cell state vector. An +/// alias for #DNNL_ARG_DST_2. +#define DNNL_ARG_DST_ITER_C DNNL_ARG_DST_2 + +/// Weights argument #0. +#define DNNL_ARG_WEIGHTS_0 33 +/// A special mnemonic for primitives that have a single weights +/// argument. Alias for #DNNL_ARG_WEIGHTS_0. +#define DNNL_ARG_WEIGHTS DNNL_ARG_WEIGHTS_0 +/// A special mnemonic for RNN weights applied to the layer input. An +/// alias for #DNNL_ARG_WEIGHTS_0. +#define DNNL_ARG_WEIGHTS_LAYER DNNL_ARG_WEIGHTS_0 + +/// Weights argument #1. +#define DNNL_ARG_WEIGHTS_1 34 +/// A special mnemonic for RNN weights applied to the recurrent input. +/// An alias for #DNNL_ARG_WEIGHTS_1. +#define DNNL_ARG_WEIGHTS_ITER DNNL_ARG_WEIGHTS_1 + +/// Weights argument #2. +#define DNNL_ARG_WEIGHTS_2 35 +/// A special mnemonic for RNN weights applied to the peephole weights. +/// An alias for #DNNL_ARG_WEIGHTS_2. +#define DNNL_ARG_WEIGHTS_PEEPHOLE DNNL_ARG_WEIGHTS_2 + +/// Weights argument #3. +#define DNNL_ARG_WEIGHTS_3 36 +/// A special mnemonic for RNN weights applied to the projection weights. +/// An alias for #DNNL_ARG_WEIGHTS_3. +#define DNNL_ARG_WEIGHTS_PROJECTION DNNL_ARG_WEIGHTS_3 + +/// Bias tensor argument. +#define DNNL_ARG_BIAS 41 + +/// Mean values tensor argument. +#define DNNL_ARG_MEAN 49 +/// Variance values tensor argument. +#define DNNL_ARG_VARIANCE 50 + +/// A special mnemonic for scale argument of normalization primitives. +#define DNNL_ARG_SCALE 51 +/// A special mnemonic for shift argument of normalization primitives. +#define DNNL_ARG_SHIFT 52 + +/// Workspace tensor argument. Workspace is used to pass information +/// from forward propagation to backward propagation computations. +#define DNNL_ARG_WORKSPACE 64 +/// Scratchpad (temporary storage) tensor argument. +#define DNNL_ARG_SCRATCHPAD 80 + +/// Gradient (diff) of the source argument #0. +#define DNNL_ARG_DIFF_SRC_0 129 +/// A special mnemonic for primitives that have a single diff source argument. +/// An alias for #DNNL_ARG_DIFF_SRC_0. +#define DNNL_ARG_DIFF_SRC DNNL_ARG_DIFF_SRC_0 +/// A special mnemonic for gradient (diff) of RNN input vector. An alias for +/// #DNNL_ARG_DIFF_SRC_0. +#define DNNL_ARG_DIFF_SRC_LAYER DNNL_ARG_DIFF_SRC_0 + +/// Gradient (diff) of the source argument #1. +#define DNNL_ARG_DIFF_SRC_1 130 +/// A special mnemonic for gradient (diff) of RNN input recurrent hidden state +/// vector. An alias for #DNNL_ARG_DIFF_SRC_1. +#define DNNL_ARG_DIFF_SRC_ITER DNNL_ARG_DIFF_SRC_1 + +/// Gradient (diff) of the source argument #2. +#define DNNL_ARG_DIFF_SRC_2 131 +/// A special mnemonic for gradient (diff) of RNN input recurrent cell state +/// vector. An alias for #DNNL_ARG_DIFF_SRC_1. +#define DNNL_ARG_DIFF_SRC_ITER_C DNNL_ARG_DIFF_SRC_2 + +/// Gradient (diff) of the source argument #3. +#define DNNL_ARG_DIFF_SRC_3 132 +/// A special mnemonic for gradient (diff) of RNN input recurrent cell attention +/// vector. An alias for #DNNL_ARG_DIFF_SRC_3. +#define DNNL_ARG_DIFF_AUGRU_ATTENTION DNNL_ARG_DIFF_SRC_3 + +/// Gradient (diff) of the destination argument #0. +#define DNNL_ARG_DIFF_DST_0 145 +/// A special mnemonic for primitives that have a single diff destination +/// argument. An alias for #DNNL_ARG_DIFF_DST_0. +#define DNNL_ARG_DIFF_DST DNNL_ARG_DIFF_DST_0 +/// A special mnemonic for gradient (diff) of RNN output vector. An alias for +/// #DNNL_ARG_DIFF_DST_0. +#define DNNL_ARG_DIFF_DST_LAYER DNNL_ARG_DIFF_DST_0 + +/// Gradient (diff) of the destination argument #1. +#define DNNL_ARG_DIFF_DST_1 146 +/// A special mnemonic for gradient (diff) of RNN input recurrent hidden state +/// vector. An alias for #DNNL_ARG_DIFF_DST_1. +#define DNNL_ARG_DIFF_DST_ITER DNNL_ARG_DIFF_DST_1 + +/// Gradient (diff) of the destination argument #2. +#define DNNL_ARG_DIFF_DST_2 147 +/// A special mnemonic for gradient (diff) of RNN input recurrent cell state +/// vector. An alias for #DNNL_ARG_DIFF_DST_2. +#define DNNL_ARG_DIFF_DST_ITER_C DNNL_ARG_DIFF_DST_2 + +/// Gradient (diff) of the weights argument #0. +#define DNNL_ARG_DIFF_WEIGHTS_0 161 +/// A special mnemonic for primitives that have a single diff weights +/// argument. Alias for #DNNL_ARG_DIFF_WEIGHTS_0. +#define DNNL_ARG_DIFF_WEIGHTS DNNL_ARG_DIFF_WEIGHTS_0 +/// A special mnemonic for diff of RNN weights applied to the layer input. An +/// alias for #DNNL_ARG_DIFF_WEIGHTS_0. +#define DNNL_ARG_DIFF_WEIGHTS_LAYER DNNL_ARG_DIFF_WEIGHTS_0 + +/// Gradient (diff) of the weights argument #1. +#define DNNL_ARG_DIFF_WEIGHTS_1 162 +/// A special mnemonic for diff of RNN weights applied to the recurrent input. +/// An alias for #DNNL_ARG_DIFF_WEIGHTS_1. +#define DNNL_ARG_DIFF_WEIGHTS_ITER DNNL_ARG_DIFF_WEIGHTS_1 + +/// Gradient (diff) of the weights argument #2. +#define DNNL_ARG_DIFF_WEIGHTS_2 163 +/// A special mnemonic for diff of RNN weights applied to the peephole weights. +/// An alias for #DNNL_ARG_DIFF_WEIGHTS_2. +#define DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE DNNL_ARG_DIFF_WEIGHTS_2 + +/// Gradient (diff) of the weights argument #3. +#define DNNL_ARG_DIFF_WEIGHTS_3 164 +/// A special mnemonic for diff of RNN weights applied to the projection +/// weights. An alias for #DNNL_ARG_DIFF_WEIGHTS_3. +#define DNNL_ARG_DIFF_WEIGHTS_PROJECTION DNNL_ARG_DIFF_WEIGHTS_3 + +/// Gradient (diff) of the bias tensor argument. +#define DNNL_ARG_DIFF_BIAS 169 + +/// A special mnemonic for scale argument of normalization primitives. +#define DNNL_ARG_DIFF_SCALE 255 +/// A special mnemonic for shift argument of normalization primitives. +#define DNNL_ARG_DIFF_SHIFT 256 + +/// Rounding mode seed for stochastic rounding +/// Single seed needed independently of how many arguments need stochastic rounding +#define DNNL_ARG_ATTR_ROUNDING_SEED 508 + +/// Dropout mask output buffer. +#define DNNL_ARG_ATTR_DROPOUT_MASK 509 + +/// Dropout probability value passed via a buffer. +#define DNNL_ARG_ATTR_DROPOUT_PROBABILITY 510 + +/// Dropout RNG seed value passed via a buffer. +#define DNNL_ARG_ATTR_DROPOUT_SEED 511 + +/// Output scaling factors provided at execution time. +#define DNNL_ARG_ATTR_OUTPUT_SCALES 513 + +/// Starting index for source arguments for primitives that take a variable +/// number of source arguments. +#define DNNL_ARG_MULTIPLE_SRC 1024 +/// Starting index for destination arguments for primitives that produce a +/// variable number of destination arguments. +#define DNNL_ARG_MULTIPLE_DST 2048 + +/// Scaling factors provided at execution time. +#define DNNL_ARG_ATTR_SCALES 4096 + +/// Zero points provided at execution time. +#define DNNL_ARG_ATTR_ZERO_POINTS 8192 + +/// Arguments for fused depthwise convolution. +/// See @ref dev_guide_attributes_post_ops_depthwise_fusion +#define DNNL_ARG_ATTR_POST_OP_DW 16384 + +/// Starting point for a binary post operation. +#define DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE 32768 + +/// Arguments for a binary post operation. Up to 32 arguments are supported. +/// See @ref dev_guide_attributes_post_ops_binary_fusion +#define DNNL_ARG_ATTR_MULTIPLE_POST_OP(idx) \ + (DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE * ((idx) + 1)) + +/// A structure that contains an index and a memory object, and is used to pass +/// arguments to dnnl_primitive_execute(). +typedef struct { + int arg; ///< An argument index, e.g. DNNL_ARG_SRC + dnnl_memory_t memory; ///< Input/output memory +} dnnl_exec_arg_t; + +/// @} dnnl_api_primitives_common + +/// @addtogroup dnnl_api_primitives_common +/// @{ + +/// Primitive descriptor query specification +/// +/// For generic function dnnl_primitive_desc_query(), the type of result must +/// agree with the queried argument. The correspondence table: +/// +/// Query kind | Type of query result +/// --------------------------------|----------------------------- +/// dnnl_query_*_engine | #dnnl_engine_t * +/// #dnnl_query_primitive_kind | #dnnl_primitive_kind_t * +/// dnnl_query_*_s32 | int * +/// dnnl_query_*_s64 | #dnnl_dim_t * (same as int64_t *) +/// dnnl_query_*_f32 | float * +/// dnnl_query_*_f64 | double * +/// dnnl_query_*_str | const char ** +/// dnnl_query_*_md | #const_dnnl_memory_desc_t * +/// dnnl_query_*_pd | #const_dnnl_primitive_desc_t * +/// dnnl_query_cache_blob_id | const uint8_t ** +/// dnnl_query_strides | const #dnnl_dims_t ** +/// dnnl_query_dilations | const #dnnl_dims_t ** +/// dnnl_query_padding_l | const #dnnl_dims_t ** +/// dnnl_query_padding_r | const #dnnl_dims_t ** +/// dnnl_query_flags | unsigned * +/// dnnl_query_alg_kind | #dnnl_alg_kind_t * +/// dnnl_query_factors | const float ** +/// dnnl_query_cell_kind | #dnnl_alg_kind_t * +/// dnnl_query_direction | #dnnl_rnn_direction_t * +/// dnnl_query_activation_kind | #dnnl_alg_kind_t * +/// dnnl_query_kernel | const #dnnl_dims_t ** +/// dnnl_query_dims | const #dnnl_dims_t ** +/// dnnl_query_data_type | #dnnl_data_type_t * +/// dnnl_query_padded_dims | const #dnnl_dims_t ** +/// dnnl_query_padded_offsets | const #dnnl_dims_t ** +/// dnnl_query_format_kind | #dnnl_format_kind_t * +/// dnnl_query_inner_blks | const #dnnl_dims_t ** +/// dnnl_query_inner_idxs | const #dnnl_dims_t ** +/// dnnl_query_sparse_encoding | #dnnl_sparse_encoding_t * +/// +/// @note +/// Rule of thumb: all opaque types and structures are returned by +/// reference. All numbers are returned by value. +/// +/// @warning +/// All returned references point to constant objects and are valid only +/// during the lifetime of the queried primitive descriptor. Returned objects +/// must not be destroyed by the user. If you need to keep the object longer +/// than the lifetime of the queried primitive descriptor, use +/// dnnl_primitive_desc_clone() to make a copy. +typedef enum { + dnnl_query_undef = 0, ///< no query + + dnnl_query_engine, ///< execution engine + dnnl_query_primitive_kind, ///< primitive kind + + dnnl_query_num_of_inputs_s32, ///< number of inputs expected + dnnl_query_num_of_outputs_s32, ///< number of outputs expected + + dnnl_query_time_estimate_f64, ///< runtime estimation (seconds) + dnnl_query_memory_consumption_s64, ///< memory consumption -- extra + /// (scratch) memory, additional to + /// all inputs and outputs memory + /// (bytes) + + dnnl_query_scratchpad_engine, ///< scratchpad engine -- engine to be used + /// for creating scratchpad memory + + dnnl_query_impl_info_str, ///< implementation name + + dnnl_query_reorder_src_engine, ///< source engine + dnnl_query_reorder_dst_engine, ///< destination engine + + dnnl_query_prop_kind, ///< propagation kind + + dnnl_query_cache_blob_id_size_s64, ///< size of cache blob ID in bytes + dnnl_query_cache_blob_id, ///< cache blob ID (pointer to array) + + dnnl_query_strides, ///< strides + dnnl_query_dilations, ///< dilations + dnnl_query_padding_l, ///< left padding + dnnl_query_padding_r, ///< right padding + dnnl_query_epsilon_f32, ///< epsilon + dnnl_query_flags, ///< flags + dnnl_query_alg_kind, ///< algorithm kind + dnnl_query_alpha_f32, ///< alpha + dnnl_query_beta_f32, ///< beta + dnnl_query_axis_s32, ///< axis + dnnl_query_local_size_s64, ///< LRN parameter local size + dnnl_query_k_f32, ///< LRN parameter K + dnnl_query_p_f32, ///< Reduction parameter P + dnnl_query_factors, ///< Resampling parameter factors + dnnl_query_cell_kind, ///< RNN parameter cell kind + dnnl_query_direction, ///< RNN parameter direction + dnnl_query_activation_kind, ///< RNN parameter activation kind + dnnl_query_kernel, ///< Pooling parameter kernel + dnnl_query_group_size_s64, ///< Shuffle parameter group size + + // memory descriptor section + dnnl_query_some_md = 128, ///< stub + dnnl_query_src_md, ///< source memory desc + dnnl_query_diff_src_md, ///< source gradient memory desc + dnnl_query_weights_md, ///< weights memory descriptor desc + dnnl_query_diff_weights_md, ///< weights grad. memory desc + dnnl_query_dst_md, ///< destination memory desc + dnnl_query_diff_dst_md, ///< destination grad. memory desc + dnnl_query_workspace_md, ///< workspace memory desc + dnnl_query_scratchpad_md, ///< scratchpad memory desc + dnnl_query_exec_arg_md = 255, ///< memory desc of an execute argument + + dnnl_query_ndims_s32, ///< number of dimensions + dnnl_query_dims, ///< vector of dimensions + dnnl_query_data_type, ///< data type + dnnl_query_submemory_offset_s64, ///< submemory offset + dnnl_query_padded_dims, ///< vector of padded dimensions + dnnl_query_padded_offsets, ///< vector of padded offsets + dnnl_query_format_kind, ///< format kind + dnnl_query_inner_nblks_s32, ///< number of innermost blocks + dnnl_query_inner_blks, ///< vector of sizes of the innermost blocks + dnnl_query_inner_idxs, ///< vector of logical indices of the blocks +#ifdef DNNL_EXPERIMENTAL_SPARSE + dnnl_query_sparse_encoding, ///< Sparse encoding + dnnl_query_nnz_s64, ///< Number of non-zero entries + dnnl_query_num_handles_s32, ///< Number of buffers required for a memory +/// descriptor +#endif + // Max value to prevent UB for internal use only dnnl_query_t + dnnl_query_max = 0x7fff, +} dnnl_query_t; + +/// @} dnnl_api_primitives_common + +/// @} dnnl_api_primitives + +/// @addtogroup dnnl_api_service +/// @{ + +/// Disable profiling completely +#define DNNL_JIT_PROFILE_NONE 0u + +/// Enable VTune Profiler integration +#define DNNL_JIT_PROFILE_VTUNE 1u + +/// Enable Linux perf integration via perfmap files +#define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u + +/// Enable Linux perf integration via jitdump files +#define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u + +/// Instruct Linux perf integration via jitdump files to use TSC. @ref +/// DNNL_JIT_PROFILE_LINUX_JITDUMP must be set too for this to take effect. +#define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u + +/// Enable Linux perf integration (both jitdump and perfmap) +#define DNNL_JIT_PROFILE_LINUX_PERF \ + (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP) + +/// CPU instruction set flags +typedef enum { + /// Library choice of ISA (excepting those listed as initial support) + dnnl_cpu_isa_default = 0x0, + + /// Intel Streaming SIMD Extensions 4.1 (Intel SSE4.1) + dnnl_cpu_isa_sse41 = 0x1, + + /// Intel Advanced Vector Extensions (Intel AVX) + dnnl_cpu_isa_avx = 0x3, + + /// Intel Advanced Vector Extensions 2 (Intel AVX2) + dnnl_cpu_isa_avx2 = 0x7, + + /// Intel AVX2 and Intel Deep Learning Boost (Intel DL Boost) support + dnnl_cpu_isa_avx2_vnni = 0xf, + + /// Intel AVX2 and Intel Deep Learning Boost (Intel DL Boost) + /// with 8-bit integer, float16 and bfloat16 support + dnnl_cpu_isa_avx2_vnni_2 = 0x1f, + + /// Intel AVX-512 subset for Intel Xeon Scalable processor family + /// and Intel Core processor family. + dnnl_cpu_isa_avx512_core = 0x27, + + /// Intel AVX-512 and Intel Deep Learning Boost (Intel DL Boost) support + /// for Intel Xeon Scalable processor family + /// and Intel Core processor family. + dnnl_cpu_isa_avx512_core_vnni = 0x67, + + /// Intel AVX-512, Intel DL Boost and bfloat16 support + /// for Intel Xeon Scalable processor family + /// and Intel Core processor family. + dnnl_cpu_isa_avx512_core_bf16 = 0xe7, + + /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support + /// for Intel Xeon Scalable processor family + /// and Intel Core processor family. + // TODO: Align avx10_1 values to internal representation. + dnnl_cpu_isa_avx10_1_512 = 0x1ef, + /// @copydoc dnnl_cpu_isa_avx10_1_512 + dnnl_cpu_isa_avx512_core_fp16 = dnnl_cpu_isa_avx10_1_512, + + /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support and + /// Intel AMX with 8-bit integer and bfloat16 support + // TODO: Align avx10_1 values to internal representation. + dnnl_cpu_isa_avx10_1_512_amx = 0xfef, + /// @copydoc dnnl_cpu_isa_avx10_1_512_amx + dnnl_cpu_isa_avx512_core_amx = dnnl_cpu_isa_avx10_1_512_amx, + + /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support and + /// Intel AMX with 8-bit integer, bfloat16 and float16 support + // TODO: Align avx10_1 values to internal representation. + dnnl_cpu_isa_avx10_1_512_amx_fp16 = 0x1fef, + /// @copydoc dnnl_cpu_isa_avx10_1_512_amx_fp16 + dnnl_cpu_isa_avx512_core_amx_fp16 = dnnl_cpu_isa_avx10_1_512_amx_fp16, +} dnnl_cpu_isa_t; + +/// CPU ISA hints flags +typedef enum { + /// No hints (use default features) + dnnl_cpu_isa_no_hints = 0x0, + + /// Prefer to exclusively use Ymm registers for computations + dnnl_cpu_isa_prefer_ymm = 0x1, +} dnnl_cpu_isa_hints_t; + +/// @} dnnl_api_service + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif /* ONEAPI_DNNL_TYPES_H */ diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.h new file mode 100644 index 0000000000000000000000000000000000000000..4d768c8674ffcad62ad63b6890aeda0d57472bb2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.h @@ -0,0 +1,337 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/// @file +/// ukernel C API + +#ifndef ONEAPI_DNNL_DNNL_UKERNEL_H +#define ONEAPI_DNNL_DNNL_UKERNEL_H + +#include "oneapi/dnnl/dnnl.h" +#include "oneapi/dnnl/dnnl_ukernel_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_ukernel +/// @{ + +#ifdef DNNL_EXPERIMENTAL_UKERNEL + +/// Creates a ukernel attributes memory storage. +/// +/// @param attr_params Output ukernel attributes memory storage. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_create( + dnnl_ukernel_attr_params_t *attr_params); + +/// Sets post-operations arguments to a storage. +/// +/// @param attr_params Memory pointers storage object. +/// @param post_ops_args A pointer to pointers of post_ops storages. Expected to +/// be packed together. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_post_ops_args( + dnnl_ukernel_attr_params_t attr_params, const void **post_ops_args); + +/// Sets tensor A scales argument to a storage. +/// +/// @param attr_params Memory pointers storage object. +/// @param a_scales Pointer to the scales storage. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_A_scales( + dnnl_ukernel_attr_params_t attr_params, const void *a_scales); + +/// Sets tensor B scales argument to a storage. +/// +/// If `dnnl_brgemm_set_B_scales` used mask of 2, then at least N values of +/// selected data type are expected. +/// +/// @param attr_params Memory pointers storage object. +/// @param b_scales Pointer to the scales storage. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_B_scales( + dnnl_ukernel_attr_params_t attr_params, const void *b_scales); + +/// Sets tensor D scales argument to a storage. +/// +/// @param attr_params Memory pointers storage object. +/// @param d_scales Pointer to the scales storage. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_D_scales( + dnnl_ukernel_attr_params_t attr_params, const void *d_scales); + +/// Destroys a ukernel attributes memory storage. +/// +/// @param attr_params Memory pointers storage object to destroy. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_ukernel_attr_params_destroy( + dnnl_ukernel_attr_params_t attr_params); + +/// @addtogroup dnnl_api_ukernel_brgemm +/// @{ + +/// Creates a BRGeMM ukernel object. Operates by the following formula: +/// `C = [A x B]`. +/// +/// @param brgemm Output BRGeMM ukernel object. +/// @param M Dimension M of tensor A. +/// @param N Dimension N of tensor B. +/// @param K Dimension K of tensors A and B. +/// @param batch_size Number of batches to process. +/// @param lda Leading dimension of tensor A. +/// @param ldb Leading dimension of tensor B. +/// @param ldc Leading dimension of tensor C. +/// @param a_dt Data type of tensor A. +/// @param b_dt Data type of tensor B. +/// @param c_dt Data type of tensor C. Must be dnnl_f32. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_create(dnnl_brgemm_t *brgemm, dnnl_dim_t M, + dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t batch_size, dnnl_dim_t lda, + dnnl_dim_t ldb, dnnl_dim_t ldc, dnnl_data_type_t a_dt, + dnnl_data_type_t b_dt, dnnl_data_type_t c_dt); + +/// Sets adding an intermediate result to the output tensor C instead of +/// writing: `C += [A x B]`. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param add_C Value to indicate addition. Can be `0` to skip addition, and +/// `1` to apply addition. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_set_add_C(dnnl_brgemm_t brgemm, int add_C); + +/// Sets post-operations to a BRGeMM ukernel object: `D = post-operations(C)`. +/// +/// Post-operations applies if one of the following holds: +/// * Non-empty attributes are specified. +/// * Output data type `d_dt` is different from accumulation data type `c_dt`. +/// +/// If any of conditions happens, the final call of the accumulation chain +/// must be `dnnl_brgemm_execute_postops`, and `dnnl_brgemm_execute`, otherwise. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param ldd Leading dimension of tensor D. +/// @param d_dt Data type of tensor D. +/// @param post_ops Primitive post operations attribute to extend the kernel +/// operations. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_set_post_ops(dnnl_brgemm_t brgemm, + dnnl_dim_t ldd, dnnl_data_type_t d_dt, const_dnnl_post_ops_t post_ops); + +/// Sets tensor A scales mask to a BRGeMM ukernel object. +/// +/// For quantization flavor tensor A scales apply to accumulation buffer once C +/// is ready. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param a_scale_mask Tensor A scale mask. Can be `0` only. +dnnl_status_t DNNL_API dnnl_brgemm_set_A_scales( + dnnl_brgemm_t brgemm, int a_scale_mask); + +/// Sets tensor B scales mask to a BRGeMM ukernel object. +/// +/// For quantization flavor tensor B scales apply to accumulation buffer once C +/// is ready. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param b_scale_mask Tensor B scale mask. Can be `0` and `2` only. +dnnl_status_t DNNL_API dnnl_brgemm_set_B_scales( + dnnl_brgemm_t brgemm, int b_scale_mask); + +/// Sets tensor D scales mask to a BRGeMM ukernel object. +/// +/// For quantization flavor tensor D scales apply after all post-ops are +/// applied. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param d_scale_mask Tensor D scale mask. Can be `0` only. +dnnl_status_t DNNL_API dnnl_brgemm_set_D_scales( + dnnl_brgemm_t brgemm, int d_scale_mask); + +/// Finalizes initialization of a BRGeMM ukernel object. +/// +/// This step is mandatory to query information from the object. +/// +/// @param brgemm Output BRGeMM ukernel object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_finalize(dnnl_brgemm_t brgemm); + +/// Returns the packing type expected by a tensor B of a BRGeMM ukernel object. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param pack_type Output packing type. Can be `dnnl_brgemm_no_pack` if +/// packing is not expected, and `dnnl_brgemm_pack_32`, otherwise. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_get_B_pack_type( + const_dnnl_brgemm_t brgemm, dnnl_pack_type_t *pack_type); + +/// Returns the size of a scratchpad memory needed for the BRGeMM ukernel +/// object. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param size Output size of a buffer required for the BRGeMM ukernel object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_get_scratchpad_size( + const_dnnl_brgemm_t brgemm, size_t *size); + +/// Returns the flag indicating when the call to `dnnl_brgemm_execute_postops` +/// is valid. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param valid The flag indicating if `dnnl_brgemm_execute_postops` is valid +/// for a given ukernel object. `1` is for valid and `0`, otherwise. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_is_execute_postops_valid( + const_dnnl_brgemm_t brgemm, int *valid); + +/// Initializes the hardware-specific context. If no initialization required, +/// returns the success status. +/// +/// @param brgemm BRGeMM ukernel object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_set_hw_context(const_dnnl_brgemm_t brgemm); + +/// Releases the hardware-specific context. Must be used after all the execution +/// calls to BRGeMM ukernel objects. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_release_hw_context(); + +/// Generates an executable part of BRGeMM ukernel object. +/// @param brgemm BRGeMM ukernel object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_generate(dnnl_brgemm_t brgemm); + +/// Executes a BRGeMM ukernel object. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param A_ptr Base pointer to a tensor A. +/// @param B_ptr Base pointer to a tensor B. +/// @param A_B_offsets Pointer to the set of tensor A and tensor B offsets for +/// each batch; the set must be contiguous in memory. Single batch should +/// supply offsets for both tensors A and B simultaneously. The number of +/// batches must coincide with the `batch_size` value passed at the creation +/// stage. +/// @param C_ptr Pointer to a tensor C (accumulation buffer). +/// @param scratchpad_ptr Pointer to a scratchpad buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_execute(const_dnnl_brgemm_t brgemm, + const void *A_ptr, const void *B_ptr, const dnnl_dim_t *A_B_offsets, + void *C_ptr, void *scratchpad_ptr); + +/// Executes a BRGeMM ukernel object with post operations. +/// +/// @param brgemm BRGeMM ukernel object. +/// @param A Base pointer to a tensor A. +/// @param B Base pointer to a tensor B. +/// @param A_B_offsets Pointer to a set of tensor A and tensor B offsets for +/// each batch. A set must be contiguous in memory. A single batch should +/// supply offsets for both tensors A and B simultaneously. The number of +/// batches must coincide with the `batch_size` value passed at the creation +/// stage. +/// @param C_ptr Pointer to a tensor C (accumulation buffer). +/// @param D_ptr Pointer to a tensor D (output buffer). +/// @param scratchpad_ptr Pointer to a scratchpad buffer. +/// @param attr_params Ukernel attributes memory storage. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_execute_postops(const_dnnl_brgemm_t brgemm, + const void *A, const void *B, const dnnl_dim_t *A_B_offsets, + const void *C_ptr, void *D_ptr, void *scratchpad_ptr, + const_dnnl_ukernel_attr_params_t attr_params); + +/// Destroys a BRGeMM ukernel object. +/// +/// @param brgemm BRGeMM ukernel object to destroy. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_brgemm_destroy(dnnl_brgemm_t brgemm); + +/// Creates a transform object. +/// +/// @param transform Output transform object. +/// @param K Dimension K. +/// @param N Dimension N. +/// @param in_pack_type Input packing type. Must be one of +/// `dnnl_pack_type_no_trans`, or `dnnl_pack_type_trans`. +/// @param in_ld Input leading dimension. +/// @param out_ld Output leading dimension. When packing data, it specifies a +/// block by N dimension. +/// @param in_dt Input data type. +/// @param out_dt Output data type. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_transform_create(dnnl_transform_t *transform, + dnnl_dim_t K, dnnl_dim_t N, dnnl_pack_type_t in_pack_type, + dnnl_dim_t in_ld, dnnl_dim_t out_ld, dnnl_data_type_t in_dt, + dnnl_data_type_t out_dt); + +/// Generates an executable part of transform object. +/// @param transform Transform object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_transform_generate(dnnl_transform_t transform); + +/// Executes a transform object. +/// +/// @param transform Transform object. +/// @param in_ptr Pointer to an input buffer. +/// @param out_ptr Pointer to an output buffer. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_transform_execute( + const_dnnl_transform_t transform, const void *in_ptr, void *out_ptr); + +/// Destroys a transform object. +/// +/// @param transform Transform object. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise. +dnnl_status_t DNNL_API dnnl_transform_destroy(dnnl_transform_t transform); + +/// @} dnnl_api_ukernel_brgemm + +#endif + +/// @} dnnl_api_ukernel + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif /* ONEAPI_DNNL_DNNL_UKERNEL_H */ diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.hpp b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ffc583e9bc1012b04778c7d56bfa96f833f98216 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel.hpp @@ -0,0 +1,465 @@ +/******************************************************************************* +* Copyright 2024-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/// @file +/// ukernel C++ API + +#ifndef ONEAPI_DNNL_DNNL_UKERNEL_HPP +#define ONEAPI_DNNL_DNNL_UKERNEL_HPP + +#include "oneapi/dnnl/dnnl.hpp" +#include "oneapi/dnnl/dnnl_ukernel.h" + +/// @addtogroup dnnl_api oneDNN API +/// @{ + +/// oneDNN namespace +namespace dnnl { + +#ifdef DNNL_EXPERIMENTAL_UKERNEL + +/// @addtogroup dnnl_api_utils +/// @{ + +/// @cond DO_NOT_DOCUMENT_THIS + +template <> +struct handle_traits { + static dnnl_status_t destructor(dnnl_brgemm_t p) { + return dnnl_brgemm_destroy(p); + } +}; + +template <> +struct handle_traits { + static dnnl_status_t destructor(dnnl_transform_t p) { + return dnnl_transform_destroy(p); + } +}; + +template <> +struct handle_traits { + static dnnl_status_t destructor(dnnl_ukernel_attr_params_t p) { + return dnnl_ukernel_attr_params_destroy(p); + } +}; + +/// @endcond + +/// @} dnnl_api_utils + +#endif + +/// @addtogroup dnnl_api_ukernel Ukernels +/// Collection of ukernels +/// @{ + +/// ukernel namespace +namespace ukernel { + +#ifdef DNNL_EXPERIMENTAL_UKERNEL + +/// @addtogroup dnnl_api_ukernel_utils ukernel utils +/// ukernel utility functions +/// @{ + +/// Packing specification +enum class pack_type { + /// Undefined pack type. A guard value. + undef = dnnl_pack_type_undef, + /// Plain, not transposed layout. Similar to format_tag::ab. + no_trans = dnnl_pack_type_no_trans, + /// Plain, transposed layout. Similar to format_tag::ba. + trans = dnnl_pack_type_trans, + /// Packed by 32 bits along K dimension layout. + pack32 = dnnl_pack_type_pack32, +}; + +/// Ukernel attributes memory storage +struct attr_params : public handle { + /// Constructs a ukernel attributes memory storage. + attr_params() { + dnnl_ukernel_attr_params_t c_params = nullptr; + dnnl_status_t status = dnnl_ukernel_attr_params_create(&c_params); + error::wrap_c_api( + status, "could not create an attributes memory storage"); + reset(c_params); + } + + /// Sets post-operations arguments to a storage. + /// + /// @param post_ops_args Pointer to pointers of post_ops storages. + /// Expected to be packed together. + void set_post_ops_args(const void **post_ops_args) { + dnnl_status_t status = dnnl_ukernel_attr_params_set_post_ops_args( + get(), post_ops_args); + if (status != dnnl_success) + error::wrap_c_api( + status, "could not set post operations arguments"); + } + + /// Sets tensor A scales arguments to a storage. + /// + /// @param a_scales Pointer to scales storage. + void set_A_scales(const void *a_scales) { + dnnl_status_t status + = dnnl_ukernel_attr_params_set_A_scales(get(), a_scales); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set A scales argument"); + } + + /// Sets tensor B scales arguments to a storage. + /// + /// If @ref attr_params::set_B_scales used mask of 2, then at + /// least N values of selected data type are expected. + /// + /// @param b_scales Pointer to scales storage. + void set_B_scales(const void *b_scales) { + dnnl_status_t status + = dnnl_ukernel_attr_params_set_B_scales(get(), b_scales); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set B scales argument"); + } + + /// Sets tensor D scales arguments to a storage. + /// + /// @param d_scales Pointer to scales storage. + void set_D_scales(const void *d_scales) { + dnnl_status_t status + = dnnl_ukernel_attr_params_set_D_scales(get(), d_scales); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set D scales argument"); + } +}; +/// @} dnnl_api_ukernel_utils + +/// @addtogroup dnnl_api_ukernel_brgemm BRGeMM ukernel +/// BRGeMM ukernel routines +/// @{ + +/// BRGeMM ukernel +struct brgemm : public handle { + /// Default constructor. Produces an empty object. + brgemm() = default; + + /// Constructs a BRGeMM ukernel object. Operates by the following formula: + /// `C = [A x B]`. + /// + /// @param M Dimension M of tensor A. + /// @param N Dimension N of tensor B. + /// @param K Dimension K of tensors A and B. + /// @param batch_size Number of batches to process. + /// @param lda Leading dimension of tensor A. + /// @param ldb Leading dimension of tensor B. + /// @param ldc Leading dimension of tensor C. + /// @param a_dt Data type of tensor A. + /// @param b_dt Data type of tensor B. + /// @param c_dt Data type of tensor C. + /// @param allow_empty A flag signifying whether construction is + /// allowed to fail without throwing an exception. In this case an + /// empty object will be produced. This flag is optional and + /// defaults to false. + brgemm(memory::dim M, memory::dim N, memory::dim K, memory::dim batch_size, + memory::dim lda, memory::dim ldb, memory::dim ldc, + memory::data_type a_dt, memory::data_type b_dt, + memory::data_type c_dt, bool allow_empty = false) { + + dnnl_brgemm_t brgemm = nullptr; + dnnl_status_t status = dnnl_brgemm_create(&brgemm, M, N, K, batch_size, + lda, ldb, ldc, memory::convert_to_c(a_dt), + memory::convert_to_c(b_dt), memory::convert_to_c(c_dt)); + + if (!allow_empty) + error::wrap_c_api( + status, "could not create a BRGeMM ukernel object"); + reset(brgemm); + } + + /// Sets adding an intermediate result to the output tensor C instead of + /// writing: `C += [A x B]`. + /// + /// @param add_C Value to indicate addition. `false` to skip addition, and + /// `true` to apply addition. + void set_add_C(bool add_C) { + dnnl_status_t status + = dnnl_brgemm_set_add_C(get(), static_cast(add_C)); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set add_C attribute"); + } + + /// Sets post-operations to a BRGeMM ukernel object: + /// `D = post-operations(C)`. + /// + /// Post-operations applies if one of the following holds: + /// * Non-empty post-operations are specified. + /// * Output data type `d_dt` is different from accumulation data type + /// `c_dt`. + /// + /// @param ldd Leading dimension of tensor D. + /// @param d_dt Data type of tensor D. + /// @param po Primitive post-operation attributes to extend the kernel + /// operations. + void set_post_ops(memory::dim ldd, memory::data_type d_dt, + const post_ops &po = default_post_ops()) { + dnnl_status_t status = dnnl_brgemm_set_post_ops( + get(), ldd, memory::convert_to_c(d_dt), po.get()); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set post operations"); + } + + /// Sets tensor A scales mask to a BRGeMM ukernel object. + /// + /// For quantization flavor tensor A scales apply to accumulation buffer + /// once C is ready. + /// + /// @param a_scale_mask Tensor A scale mask. Can be `0` only. + void set_A_scales(int a_scale_mask) { + dnnl_status_t status = dnnl_brgemm_set_A_scales(get(), a_scale_mask); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set A scales"); + } + + /// Sets tensor B scales mask to a BRGeMM ukernel object. + /// + /// For quantization flavor tensor B scales apply to accumulation buffer + /// once C is ready. + /// + /// @param b_scale_mask Tensor B scale mask. Can be `0` and `2` only. + void set_B_scales(int b_scale_mask) { + dnnl_status_t status = dnnl_brgemm_set_B_scales(get(), b_scale_mask); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set B scales"); + } + + /// Sets tensor D scales mask to a BRGeMM ukernel object. + /// + /// For quantization flavor tensor D scales apply after all post-ops are + /// applied. + /// + /// @param d_scale_mask Tensor D scale mask. Can be `0` only. + void set_D_scales(int d_scale_mask) { + dnnl_status_t status = dnnl_brgemm_set_D_scales(get(), d_scale_mask); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set D scales"); + } + + /// Finalizes initialization of a BRGeMM ukernel object. + /// + /// This step must be performed prior to querying information from the + /// object. + void finalize() { + dnnl_status_t status = dnnl_brgemm_finalize(get()); + if (status != dnnl_success) + error::wrap_c_api(status, "could not finalize an object"); + } + + /// Returns the packing type expected by a tensor B of a BRGeMM ukernel + /// object. + pack_type get_B_pack_type() const { + dnnl_pack_type_t c_pack_type; + dnnl_status_t status = dnnl_brgemm_get_B_pack_type(get(), &c_pack_type); + if (status != dnnl_success) + error::wrap_c_api(status, "could not query B pack type"); + + return static_cast(c_pack_type); + } + + /// Returns the size of a scratchpad memory needed for the BRGeMM ukernel + /// object. + size_t get_scratchpad_size() const { + size_t size; + dnnl_status_t status = dnnl_brgemm_get_scratchpad_size(get(), &size); + if (status != dnnl_success) + error::wrap_c_api(status, + "could not query a scratchpad size from a BRGeMM ukernel " + "object"); + return size; + } + + /// Returns the flag indicating when the call to execute with post + /// operations is valid. + /// + /// `True` is for a valid call, `false`, otherwise. + bool is_execute_postops_valid() const { + int valid; + dnnl_status_t status + = dnnl_brgemm_is_execute_postops_valid(get(), &valid); + if (status != dnnl_success) + error::wrap_c_api(status, + "could not query a flag for execute postops from a BRGeMM " + "ukernel object"); + return static_cast(valid); + } + + /// Initializes the hardware-specific context. Affects the global state for + /// all BRGeMM ukernel objects. If no initialization required, returns. + void set_hw_context() const { + dnnl_status_t status = dnnl_brgemm_set_hw_context(get()); + if (status != dnnl_success) + error::wrap_c_api(status, "could not set hardware context"); + } + + /// Releases the hardware-specific context. Affects the global state for + /// all BRGeMM ukernel objects. Must be used after all the execution calls + /// to BRGeMM ukernel objects. + static void release_hw_context() { + dnnl_status_t status = dnnl_brgemm_release_hw_context(); + if (status != dnnl_success) + error::wrap_c_api(status, "could not release hardware context"); + } + + /// Generates an executable part of BRGeMM ukernel object. + void generate() { + dnnl_status_t status = dnnl_brgemm_generate(get()); + if (status != dnnl_success) + error::wrap_c_api(status, "could not generate a kernel"); + } + + /// Executes a BRGeMM ukernel object. + /// + /// @param A Base pointer to a tensor A. + /// @param B Base pointer to a tensor B. + /// @param A_B_offsets Vector of pairs of tensors A and B offsets for + /// each batch. The number of batches must coincide with the + /// `batch_size` value passed at object construction stage. + /// @param C Pointer to a tensor C (accumulation buffer). + /// @param scratchpad Pointer to a scratchpad buffer. + void execute(const void *A, const void *B, + const std::vector> &A_B_offsets, + void *C, void *scratchpad) const { + // TODO: export batch_element to C API later for user to fill it and + // pass directly to the call. + dnnl_status_t status = dnnl_brgemm_execute(get(), A, B, + (const dnnl_dim_t *)A_B_offsets.data(), C, scratchpad); + if (status != dnnl_success) + error::wrap_c_api( + status, "could not execute a BRGeMM ukernel object"); + } + + /// Executes a BRGeMM ukernel object with post operations. + /// + /// @param A Base pointer to a tensor A. + /// @param B Base pointer to a tensor B. + /// @param A_B_offsets Vector of pairs of tensors A and B offsets for + /// each batch. The number of batches must coincide with the + /// `batch_size` value passed at object construction stage. + /// @param C Pointer to a tensor C (accumulation buffer). + /// @param D Pointer to a tensor D (output buffer). + /// @param scratchpad Pointer to a scratchpad buffer. + /// @param params Post-op memory arguments. Must be passed If binary + /// post-op or scales were set. + void execute(const void *A, const void *B, + const std::vector> &A_B_offsets, + const void *C, void *D, void *scratchpad, + const attr_params ¶ms = default_attr_params()) const { + // TODO: export batch_element to C API later for user to fill it and + // pass directly to the call. + dnnl_status_t status = dnnl_brgemm_execute_postops(get(), A, B, + (const dnnl_dim_t *)A_B_offsets.data(), C, D, scratchpad, + params.get()); + if (status != dnnl_success) + error::wrap_c_api( + status, "could not execute a BRGeMM ukernel object"); + } + + /// Returns a constant reference to a static instance of default constructed + /// primitive post-operations attribute. + static const post_ops &default_post_ops() { + static const post_ops po; + return po; + } + + /// Returns a constant reference to a static instance of default constructed + /// ukernel attributes parameters. + static const attr_params &default_attr_params() { + static const attr_params ap; + return ap; + } +}; +/// @} dnnl_api_ukernel_brgemm + +/// @addtogroup dnnl_api_ukernel_transform Transform ukernel +/// Transform routines +/// @{ + +/// Transform ukernel +struct transform : public handle { + /// Default constructor. Produces an empty object. + transform() = default; + + /// Constructs a transform object. + /// + /// @param K Dimension K. + /// @param N Dimension N. + /// @param in_pack_type Input packing type. Must be one of + /// `pack_type::no_trans`, or `pack_type::trans`. + /// @param in_ld Input leading dimension. + /// @param out_ld Output leading dimension. Specifies a block by N dimension + /// during data packing. + /// @param in_dt Input data type. + /// @param out_dt Output data type. + /// @param allow_empty A flag signifying whether construction is + /// allowed to fail without throwing an exception. In this case an + /// empty object will be produced. This flag is optional and + /// defaults to false. + transform(memory::dim K, memory::dim N, pack_type in_pack_type, + memory::dim in_ld, memory::dim out_ld, memory::data_type in_dt, + memory::data_type out_dt, bool allow_empty = false) { + + dnnl_transform_t transform = nullptr; + dnnl_status_t status = dnnl_transform_create(&transform, K, N, + static_cast(in_pack_type), in_ld, out_ld, + memory::convert_to_c(in_dt), memory::convert_to_c(out_dt)); + + if (!allow_empty) + error::wrap_c_api(status, + "could not create a BRGeMM ukernel packing B object"); + reset(transform); + } + + /// Generates an executable part of transform object. + void generate() { + dnnl_status_t status = dnnl_transform_generate(get()); + if (status != dnnl_success) + error::wrap_c_api(status, + "could not generate a BRGeMM ukernel packing B object"); + } + + /// Executes a transform object. + /// + /// @param in Pointer to an input buffer. + /// @param out Pointer to an output buffer. + void execute(const void *in, void *out) const { + dnnl_status_t status = dnnl_transform_execute(get(), in, out); + if (status != dnnl_success) + error::wrap_c_api(status, + "could not execute a BRGeMM ukernel packing B object"); + } +}; + +/// @} dnnl_api_ukernel_transform + +#endif + +} // namespace ukernel + +/// @} dnnl_api_ukernel + +} // namespace dnnl + +/// @} dnnl_api + +#endif /* ONEAPI_DNNL_DNNL_UKERNEL_HPP */ diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel_types.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel_types.h new file mode 100644 index 0000000000000000000000000000000000000000..644e50e2adf727188bed73cc21e797868ae8cd2e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_ukernel_types.h @@ -0,0 +1,93 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/// @file +/// ukernel C API types definitions + +#ifndef ONEAPI_DNNL_DNNL_UKERNEL_TYPES_H +#define ONEAPI_DNNL_DNNL_UKERNEL_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "oneapi/dnnl/dnnl_types.h" + +/// @addtogroup dnnl_api +/// @{ + +/// @addtogroup dnnl_api_ukernel +/// @{ + +#ifdef DNNL_EXPERIMENTAL_UKERNEL + +/// Packing specification +typedef enum { + /// Undefined pack type. A guard value. + dnnl_pack_type_undef = 0, + /// Plain, not transposed layout. Similar to format_tag::ab. + dnnl_pack_type_no_trans, + /// Plain, transposed layout. Similar to format_tag::ba. + dnnl_pack_type_trans, + /// Packed by 32 bits along K dimension layout. + dnnl_pack_type_pack32, +} dnnl_pack_type_t; + +/// @struct dnnl_ukernel_attr_params +/// An opaque structure to describe ukernel attributes memory storage. +struct dnnl_ukernel_attr_params; + +/// A ukernel attributes memory storage handle. +typedef struct dnnl_ukernel_attr_params *dnnl_ukernel_attr_params_t; + +/// A constant ukernel attributes memory storage handle. +typedef const struct dnnl_ukernel_attr_params *const_dnnl_ukernel_attr_params_t; + +/// @addtogroup dnnl_api_ukernel_brgemm +/// @{ + +/// @struct dnnl_brgemm +/// An opaque structure to describe a brgemm ukernel. +struct dnnl_brgemm; + +/// A brgemm ukernel handle. +typedef struct dnnl_brgemm *dnnl_brgemm_t; + +/// A constant brgemm ukernel handle. +typedef const struct dnnl_brgemm *const_dnnl_brgemm_t; + +/// @struct dnnl_transform +/// An opaque structure to describe a transform routine. +struct dnnl_transform; + +/// A transform routine handle. +typedef struct dnnl_transform *dnnl_transform_t; + +/// A constant transform routine handle. +typedef const struct dnnl_transform *const_dnnl_transform_t; + +/// @} dnnl_api_ukernel_brgemm +#endif + +/// @} dnnl_api_ukernel + +/// @} dnnl_api + +#ifdef __cplusplus +} +#endif + +#endif /* ONEAPI_DNNL_DNNL_UKERNEL_TYPES_H */ diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version.h new file mode 100644 index 0000000000000000000000000000000000000000..abd7a9d9f041d9e2146273848c53ffe182c9e459 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version.h @@ -0,0 +1,33 @@ +/******************************************************************************* +* Copyright 2019-2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_VERSION_H +#define ONEAPI_DNNL_DNNL_VERSION_H + +// clang-format off + +/// Major version +#define DNNL_VERSION_MAJOR 3 + +/// Minor version +#define DNNL_VERSION_MINOR 7 + +/// Patch version +#define DNNL_VERSION_PATCH 1 + +// clang-format on + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version_hash.h b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version_hash.h new file mode 100644 index 0000000000000000000000000000000000000000..147d397b4343de7deb478410b239f27227c8d73b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/oneapi/dnnl/dnnl_version_hash.h @@ -0,0 +1,31 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef ONEAPI_DNNL_DNNL_VERSION_HASH_H +#define ONEAPI_DNNL_DNNL_VERSION_HASH_H + +// clang-format off + +/// Note: this macro and header file were moved to a separate instance to avoid +/// incremental build issues as moving from commit to commit would trigger a +/// complete library rebuild. Including a generated header file in a single +/// translation unit makes this problem go away. +/// Git commit hash +#define DNNL_VERSION_HASH "8d263e693366ef8db40acc569cc7d8edf644556d" + +// clang-format on + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/pybind11/attr.h b/phivenv/Lib/site-packages/torch/include/pybind11/attr.h new file mode 100644 index 0000000000000000000000000000000000000000..6ea183b39653c32d9e3458be697e270a6cd695be --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/pybind11/attr.h @@ -0,0 +1,690 @@ +/* + pybind11/attr.h: Infrastructure for processing custom + type and function attributes + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "cast.h" + +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// \addtogroup annotations +/// @{ + +/// Annotation for methods +struct is_method { + handle class_; + explicit is_method(const handle &c) : class_(c) {} +}; + +/// Annotation for setters +struct is_setter {}; + +/// Annotation for operators +struct is_operator {}; + +/// Annotation for classes that cannot be subclassed +struct is_final {}; + +/// Annotation for parent scope +struct scope { + handle value; + explicit scope(const handle &s) : value(s) {} +}; + +/// Annotation for documentation +struct doc { + const char *value; + explicit doc(const char *value) : value(value) {} +}; + +/// Annotation for function names +struct name { + const char *value; + explicit name(const char *value) : value(value) {} +}; + +/// Annotation indicating that a function is an overload associated with a given "sibling" +struct sibling { + handle value; + explicit sibling(const handle &value) : value(value.ptr()) {} +}; + +/// Annotation indicating that a class derives from another given type +template +struct base { + + PYBIND11_DEPRECATED( + "base() was deprecated in favor of specifying 'T' as a template argument to class_") + base() = default; +}; + +/// Keep patient alive while nurse lives +template +struct keep_alive {}; + +/// Annotation indicating that a class is involved in a multiple inheritance relationship +struct multiple_inheritance {}; + +/// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class +struct dynamic_attr {}; + +/// Annotation which enables the buffer protocol for a type +struct buffer_protocol {}; + +/// Annotation which requests that a special metaclass is created for a type +struct metaclass { + handle value; + + PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.") + metaclass() = default; + + /// Override pybind11's default metaclass + explicit metaclass(handle value) : value(value) {} +}; + +/// Specifies a custom callback with signature `void (PyHeapTypeObject*)` that +/// may be used to customize the Python type. +/// +/// The callback is invoked immediately before `PyType_Ready`. +/// +/// Note: This is an advanced interface, and uses of it may require changes to +/// work with later versions of pybind11. You may wish to consult the +/// implementation of `make_new_python_type` in `detail/classes.h` to understand +/// the context in which the callback will be run. +struct custom_type_setup { + using callback = std::function; + + explicit custom_type_setup(callback value) : value(std::move(value)) {} + + callback value; +}; + +/// Annotation that marks a class as local to the module: +struct module_local { + const bool value; + constexpr explicit module_local(bool v = true) : value(v) {} +}; + +/// Annotation to mark enums as an arithmetic type +struct arithmetic {}; + +/// Mark a function for addition at the beginning of the existing overload chain instead of the end +struct prepend {}; + +/** \rst + A call policy which places one or more guard variables (``Ts...``) around the function call. + + For example, this definition: + + .. code-block:: cpp + + m.def("foo", foo, py::call_guard()); + + is equivalent to the following pseudocode: + + .. code-block:: cpp + + m.def("foo", [](args...) { + T scope_guard; + return foo(args...); // forwarded arguments + }); + \endrst */ +template +struct call_guard; + +template <> +struct call_guard<> { + using type = detail::void_type; +}; + +template +struct call_guard { + static_assert(std::is_default_constructible::value, + "The guard type must be default constructible"); + + using type = T; +}; + +template +struct call_guard { + struct type { + T guard{}; // Compose multiple guard types with left-to-right default-constructor order + typename call_guard::type next{}; + }; +}; + +/// @} annotations + +PYBIND11_NAMESPACE_BEGIN(detail) +/* Forward declarations */ +enum op_id : int; +enum op_type : int; +struct undefined_t; +template +struct op_; +void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret); + +/// Internal data structure which holds metadata about a keyword argument +struct argument_record { + const char *name; ///< Argument name + const char *descr; ///< Human-readable version of the argument value + handle value; ///< Associated Python object + bool convert : 1; ///< True if the argument is allowed to convert when loading + bool none : 1; ///< True if None is allowed when loading + + argument_record(const char *name, const char *descr, handle value, bool convert, bool none) + : name(name), descr(descr), value(value), convert(convert), none(none) {} +}; + +/// Internal data structure which holds metadata about a bound function (signature, overloads, +/// etc.) +struct function_record { + function_record() + : is_constructor(false), is_new_style_constructor(false), is_stateless(false), + is_operator(false), is_method(false), is_setter(false), has_args(false), + has_kwargs(false), prepend(false) {} + + /// Function name + char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ + + // User-specified documentation string + char *doc = nullptr; + + /// Human-readable version of the function signature + char *signature = nullptr; + + /// List of registered keyword arguments + std::vector args; + + /// Pointer to lambda function which converts arguments and performs the actual call + handle (*impl)(function_call &) = nullptr; + + /// Storage for the wrapped function pointer and captured data, if any + void *data[3] = {}; + + /// Pointer to custom destructor for 'data' (if needed) + void (*free_data)(function_record *ptr) = nullptr; + + /// Return value policy associated with this function + return_value_policy policy = return_value_policy::automatic; + + /// True if name == '__init__' + bool is_constructor : 1; + + /// True if this is a new-style `__init__` defined in `detail/init.h` + bool is_new_style_constructor : 1; + + /// True if this is a stateless function pointer + bool is_stateless : 1; + + /// True if this is an operator (__add__), etc. + bool is_operator : 1; + + /// True if this is a method + bool is_method : 1; + + /// True if this is a setter + bool is_setter : 1; + + /// True if the function has a '*args' argument + bool has_args : 1; + + /// True if the function has a '**kwargs' argument + bool has_kwargs : 1; + + /// True if this function is to be inserted at the beginning of the overload resolution chain + bool prepend : 1; + + /// Number of arguments (including py::args and/or py::kwargs, if present) + std::uint16_t nargs; + + /// Number of leading positional arguments, which are terminated by a py::args or py::kwargs + /// argument or by a py::kw_only annotation. + std::uint16_t nargs_pos = 0; + + /// Number of leading arguments (counted in `nargs`) that are positional-only + std::uint16_t nargs_pos_only = 0; + + /// Python method object + PyMethodDef *def = nullptr; + + /// Python handle to the parent scope (a class or a module) + handle scope; + + /// Python handle to the sibling function representing an overload chain + handle sibling; + + /// Pointer to next overload + function_record *next = nullptr; +}; + +/// Special data structure which (temporarily) holds metadata about a bound class +struct type_record { + PYBIND11_NOINLINE type_record() + : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false), + default_holder(true), module_local(false), is_final(false) {} + + /// Handle to the parent scope + handle scope; + + /// Name of the class + const char *name = nullptr; + + // Pointer to RTTI type_info data structure + const std::type_info *type = nullptr; + + /// How large is the underlying C++ type? + size_t type_size = 0; + + /// What is the alignment of the underlying C++ type? + size_t type_align = 0; + + /// How large is the type's holder? + size_t holder_size = 0; + + /// The global operator new can be overridden with a class-specific variant + void *(*operator_new)(size_t) = nullptr; + + /// Function pointer to class_<..>::init_instance + void (*init_instance)(instance *, const void *) = nullptr; + + /// Function pointer to class_<..>::dealloc + void (*dealloc)(detail::value_and_holder &) = nullptr; + + /// List of base classes of the newly created type + list bases; + + /// Optional docstring + const char *doc = nullptr; + + /// Custom metaclass (optional) + handle metaclass; + + /// Custom type setup. + custom_type_setup::callback custom_type_setup_callback; + + /// Multiple inheritance marker + bool multiple_inheritance : 1; + + /// Does the class manage a __dict__? + bool dynamic_attr : 1; + + /// Does the class implement the buffer protocol? + bool buffer_protocol : 1; + + /// Is the default (unique_ptr) holder type used? + bool default_holder : 1; + + /// Is the class definition local to the module shared object? + bool module_local : 1; + + /// Is the class inheritable from python classes? + bool is_final : 1; + + PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *) ) { + auto *base_info = detail::get_type_info(base, false); + if (!base_info) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + + "\" referenced unknown base type \"" + tname + "\""); + } + + if (default_holder != base_info->default_holder) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + + (default_holder ? "does not have" : "has") + + " a non-default holder type while its base \"" + tname + "\" " + + (base_info->default_holder ? "does not" : "does")); + } + + bases.append((PyObject *) base_info->type); + +#if PY_VERSION_HEX < 0x030B0000 + dynamic_attr |= base_info->type->tp_dictoffset != 0; +#else + dynamic_attr |= (base_info->type->tp_flags & Py_TPFLAGS_MANAGED_DICT) != 0; +#endif + + if (caster) { + base_info->implicit_casts.emplace_back(type, caster); + } + } +}; + +inline function_call::function_call(const function_record &f, handle p) : func(f), parent(p) { + args.reserve(f.nargs); + args_convert.reserve(f.nargs); +} + +/// Tag for a new-style `__init__` defined in `detail/init.h` +struct is_new_style_constructor {}; + +/** + * Partial template specializations to process custom attributes provided to + * cpp_function_ and class_. These are either used to initialize the respective + * fields in the type_record and function_record data structures or executed at + * runtime to deal with custom call policies (e.g. keep_alive). + */ +template +struct process_attribute; + +template +struct process_attribute_default { + /// Default implementation: do nothing + static void init(const T &, function_record *) {} + static void init(const T &, type_record *) {} + static void precall(function_call &) {} + static void postcall(function_call &, handle) {} +}; + +/// Process an attribute specifying the function's name +template <> +struct process_attribute : process_attribute_default { + static void init(const name &n, function_record *r) { r->name = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring +template <> +struct process_attribute : process_attribute_default { + static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring (provided as a C-style string) +template <> +struct process_attribute : process_attribute_default { + static void init(const char *d, function_record *r) { r->doc = const_cast(d); } + static void init(const char *d, type_record *r) { r->doc = d; } +}; +template <> +struct process_attribute : process_attribute {}; + +/// Process an attribute indicating the function's return value policy +template <> +struct process_attribute : process_attribute_default { + static void init(const return_value_policy &p, function_record *r) { r->policy = p; } +}; + +/// Process an attribute which indicates that this is an overloaded function associated with a +/// given sibling +template <> +struct process_attribute : process_attribute_default { + static void init(const sibling &s, function_record *r) { r->sibling = s.value; } +}; + +/// Process an attribute which indicates that this function is a method +template <> +struct process_attribute : process_attribute_default { + static void init(const is_method &s, function_record *r) { + r->is_method = true; + r->scope = s.class_; + } +}; + +/// Process an attribute which indicates that this function is a setter +template <> +struct process_attribute : process_attribute_default { + static void init(const is_setter &, function_record *r) { r->is_setter = true; } +}; + +/// Process an attribute which indicates the parent scope of a method +template <> +struct process_attribute : process_attribute_default { + static void init(const scope &s, function_record *r) { r->scope = s.value; } +}; + +/// Process an attribute which indicates that this function is an operator +template <> +struct process_attribute : process_attribute_default { + static void init(const is_operator &, function_record *r) { r->is_operator = true; } +}; + +template <> +struct process_attribute + : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { + r->is_new_style_constructor = true; + } +}; + +inline void check_kw_only_arg(const arg &a, function_record *r) { + if (r->args.size() > r->nargs_pos && (!a.name || a.name[0] == '\0')) { + pybind11_fail("arg(): cannot specify an unnamed argument after a kw_only() annotation or " + "args() argument"); + } +} + +inline void append_self_arg_if_needed(function_record *r) { + if (r->is_method && r->args.empty()) { + r->args.emplace_back("self", nullptr, handle(), /*convert=*/true, /*none=*/false); + } +} + +/// Process a keyword argument attribute (*without* a default value) +template <> +struct process_attribute : process_attribute_default { + static void init(const arg &a, function_record *r) { + append_self_arg_if_needed(r); + r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); + + check_kw_only_arg(a, r); + } +}; + +/// Process a keyword argument attribute (*with* a default value) +template <> +struct process_attribute : process_attribute_default { + static void init(const arg_v &a, function_record *r) { + if (r->is_method && r->args.empty()) { + r->args.emplace_back( + "self", /*descr=*/nullptr, /*parent=*/handle(), /*convert=*/true, /*none=*/false); + } + + if (!a.value) { +#if defined(PYBIND11_DETAILED_ERROR_MESSAGES) + std::string descr("'"); + if (a.name) { + descr += std::string(a.name) + ": "; + } + descr += a.type + "'"; + if (r->is_method) { + if (r->name) { + descr += " in method '" + (std::string) str(r->scope) + "." + + (std::string) r->name + "'"; + } else { + descr += " in method of '" + (std::string) str(r->scope) + "'"; + } + } else if (r->name) { + descr += " in function '" + (std::string) r->name + "'"; + } + pybind11_fail("arg(): could not convert default argument " + descr + + " into a Python object (type not registered yet?)"); +#else + pybind11_fail("arg(): could not convert default argument " + "into a Python object (type not registered yet?). " + "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for " + "more information."); +#endif + } + r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); + + check_kw_only_arg(a, r); + } +}; + +/// Process a keyword-only-arguments-follow pseudo argument +template <> +struct process_attribute : process_attribute_default { + static void init(const kw_only &, function_record *r) { + append_self_arg_if_needed(r); + if (r->has_args && r->nargs_pos != static_cast(r->args.size())) { + pybind11_fail("Mismatched args() and kw_only(): they must occur at the same relative " + "argument location (or omit kw_only() entirely)"); + } + r->nargs_pos = static_cast(r->args.size()); + } +}; + +/// Process a positional-only-argument maker +template <> +struct process_attribute : process_attribute_default { + static void init(const pos_only &, function_record *r) { + append_self_arg_if_needed(r); + r->nargs_pos_only = static_cast(r->args.size()); + if (r->nargs_pos_only > r->nargs_pos) { + pybind11_fail("pos_only(): cannot follow a py::args() argument"); + } + // It also can't follow a kw_only, but a static_assert in pybind11.h checks that + } +}; + +/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees +/// that) +template +struct process_attribute::value>> + : process_attribute_default { + static void init(const handle &h, type_record *r) { r->bases.append(h); } +}; + +/// Process a parent class attribute (deprecated, does not support multiple inheritance) +template +struct process_attribute> : process_attribute_default> { + static void init(const base &, type_record *r) { r->add_base(typeid(T), nullptr); } +}; + +/// Process a multiple inheritance attribute +template <> +struct process_attribute : process_attribute_default { + static void init(const multiple_inheritance &, type_record *r) { + r->multiple_inheritance = true; + } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; } +}; + +template <> +struct process_attribute { + static void init(const custom_type_setup &value, type_record *r) { + r->custom_type_setup_callback = value.value; + } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const is_final &, type_record *r) { r->is_final = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const metaclass &m, type_record *r) { r->metaclass = m.value; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const module_local &l, type_record *r) { r->module_local = l.value; } +}; + +/// Process a 'prepend' attribute, putting this at the beginning of the overload chain +template <> +struct process_attribute : process_attribute_default { + static void init(const prepend &, function_record *r) { r->prepend = true; } +}; + +/// Process an 'arithmetic' attribute for enums (does nothing here) +template <> +struct process_attribute : process_attribute_default {}; + +template +struct process_attribute> : process_attribute_default> {}; + +/** + * Process a keep_alive call policy -- invokes keep_alive_impl during the + * pre-call handler if both Nurse, Patient != 0 and use the post-call handler + * otherwise + */ +template +struct process_attribute> + : public process_attribute_default> { + template = 0> + static void precall(function_call &call) { + keep_alive_impl(Nurse, Patient, call, handle()); + } + template = 0> + static void postcall(function_call &, handle) {} + template = 0> + static void precall(function_call &) {} + template = 0> + static void postcall(function_call &call, handle ret) { + keep_alive_impl(Nurse, Patient, call, ret); + } +}; + +/// Recursively iterate over variadic template arguments +template +struct process_attributes { + static void init(const Args &...args, function_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{ + 0, ((void) process_attribute::type>::init(args, r), 0)...}; + } + static void init(const Args &...args, type_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::init(args, r), 0)...}; + } + static void precall(function_call &call) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::precall(call), 0)...}; + } + static void postcall(function_call &call, handle fn_ret) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call, fn_ret); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(fn_ret); + using expander = int[]; + (void) expander{ + 0, (process_attribute::type>::postcall(call, fn_ret), 0)...}; + } +}; + +template +using is_call_guard = is_instantiation; + +/// Extract the ``type`` from the first `call_guard` in `Extras...` (or `void_type` if none found) +template +using extract_guard_t = typename exactly_one_t, Extra...>::type; + +/// Check the number of named arguments at compile time +template ::value...), + size_t self = constexpr_sum(std::is_same::value...)> +constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(nargs, has_args, has_kwargs); + return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs; +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/phivenv/Lib/site-packages/torch/include/pybind11/buffer_info.h b/phivenv/Lib/site-packages/torch/include/pybind11/buffer_info.h new file mode 100644 index 0000000000000000000000000000000000000000..c44438ffa079fd8900438adaac9ed0abaf4d39ac --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/pybind11/buffer_info.h @@ -0,0 +1,208 @@ +/* + pybind11/buffer_info.h: Python buffer object interface + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Default, C-style strides +inline std::vector c_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + if (ndim > 0) { + for (size_t i = ndim - 1; i > 0; --i) { + strides[i - 1] = strides[i] * shape[i]; + } + } + return strides; +} + +// F-style strides; default when constructing an array_t with `ExtraFlags & f_style` +inline std::vector f_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + for (size_t i = 1; i < ndim; ++i) { + strides[i] = strides[i - 1] * shape[i - 1]; + } + return strides; +} + +template +struct compare_buffer_info; + +PYBIND11_NAMESPACE_END(detail) + +/// Information record describing a Python buffer object +struct buffer_info { + void *ptr = nullptr; // Pointer to the underlying storage + ssize_t itemsize = 0; // Size of individual items in bytes + ssize_t size = 0; // Total number of entries + std::string format; // For homogeneous buffers, this should be set to + // format_descriptor::format() + ssize_t ndim = 0; // Number of dimensions + std::vector shape; // Shape of the tensor (1 entry per dimension) + std::vector strides; // Number of bytes between adjacent entries + // (for each per dimension) + bool readonly = false; // flag to indicate if the underlying storage may be written to + + buffer_info() = default; + + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), + shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { + if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) { + pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); + } + for (size_t i = 0; i < (size_t) ndim; ++i) { + size *= shape[i]; + } + } + + template + buffer_info(T *ptr, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : buffer_info(private_ctr_tag(), + ptr, + sizeof(T), + format_descriptor::format(), + static_cast(shape_in->size()), + std::move(shape_in), + std::move(strides_in), + readonly) {} + + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t size, + bool readonly = false) + : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) {} + + template + buffer_info(T *ptr, ssize_t size, bool readonly = false) + : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) {} + + template + buffer_info(const T *ptr, ssize_t size, bool readonly = true) + : buffer_info( + const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) {} + + explicit buffer_info(Py_buffer *view, bool ownview = true) + : buffer_info( + view->buf, + view->itemsize, + view->format, + view->ndim, + {view->shape, view->shape + view->ndim}, + /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects + * ignore this flag and return a view with NULL strides. + * When strides are NULL, build them manually. */ + view->strides + ? std::vector(view->strides, view->strides + view->ndim) + : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize), + (view->readonly != 0)) { + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) + this->m_view = view; + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) + this->ownview = ownview; + } + + buffer_info(const buffer_info &) = delete; + buffer_info &operator=(const buffer_info &) = delete; + + buffer_info(buffer_info &&other) noexcept { (*this) = std::move(other); } + + buffer_info &operator=(buffer_info &&rhs) noexcept { + ptr = rhs.ptr; + itemsize = rhs.itemsize; + size = rhs.size; + format = std::move(rhs.format); + ndim = rhs.ndim; + shape = std::move(rhs.shape); + strides = std::move(rhs.strides); + std::swap(m_view, rhs.m_view); + std::swap(ownview, rhs.ownview); + readonly = rhs.readonly; + return *this; + } + + ~buffer_info() { + if (m_view && ownview) { + PyBuffer_Release(m_view); + delete m_view; + } + } + + Py_buffer *view() const { return m_view; } + Py_buffer *&view() { return m_view; } + + /* True if the buffer item type is equivalent to `T`. */ + // To define "equivalent" by example: + // `buffer_info::item_type_is_equivalent_to(b)` and + // `buffer_info::item_type_is_equivalent_to(b)` may both be true + // on some platforms, but `int` and `unsigned` will never be equivalent. + // For the ground truth, please inspect `detail::compare_buffer_info<>`. + template + bool item_type_is_equivalent_to() const { + return detail::compare_buffer_info::compare(*this); + } + +private: + struct private_ctr_tag {}; + + buffer_info(private_ctr_tag, + void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container &&shape_in, + detail::any_container &&strides_in, + bool readonly) + : buffer_info( + ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) {} + + Py_buffer *m_view = nullptr; + bool ownview = false; +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct compare_buffer_info { + static bool compare(const buffer_info &b) { + // NOLINTNEXTLINE(bugprone-sizeof-expression) Needed for `PyObject *` + return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); + } +}; + +template +struct compare_buffer_info::value>> { + static bool compare(const buffer_info &b) { + return (size_t) b.itemsize == sizeof(T) + && (b.format == format_descriptor::value + || ((sizeof(T) == sizeof(long)) + && b.format == (std::is_unsigned::value ? "L" : "l")) + || ((sizeof(T) == sizeof(size_t)) + && b.format == (std::is_unsigned::value ? "N" : "n"))); + } +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/phivenv/Lib/site-packages/torch/include/pybind11/cast.h b/phivenv/Lib/site-packages/torch/include/pybind11/cast.h new file mode 100644 index 0000000000000000000000000000000000000000..6a32f537ceca3212dfc4adba65afd3a57881efa3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/pybind11/cast.h @@ -0,0 +1,1855 @@ +/* + pybind11/cast.h: Partial template specializations to cast between + C++ and Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "detail/descr.h" +#include "detail/type_caster_base.h" +#include "detail/typeid.h" +#include "pytypes.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_WARNING_DISABLE_MSVC(4127) + +PYBIND11_NAMESPACE_BEGIN(detail) + +template +class type_caster : public type_caster_base {}; +template +using make_caster = type_caster>; + +// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T +template +typename make_caster::template cast_op_type cast_op(make_caster &caster) { + using result_t = typename make_caster::template cast_op_type; // See PR #4893 + return caster.operator result_t(); +} +template +typename make_caster::template cast_op_type::type> +cast_op(make_caster &&caster) { + using result_t = typename make_caster::template cast_op_type< + typename std::add_rvalue_reference::type>; // See PR #4893 + return std::move(caster).operator result_t(); +} + +template +class type_caster> { +private: + using caster_t = make_caster; + caster_t subcaster; + using reference_t = type &; + using subcaster_cast_op_type = typename caster_t::template cast_op_type; + + static_assert( + std::is_same::type &, subcaster_cast_op_type>::value + || std::is_same::value, + "std::reference_wrapper caster requires T to have a caster with an " + "`operator T &()` or `operator const T &()`"); + +public: + bool load(handle src, bool convert) { return subcaster.load(src, convert); } + static constexpr auto name = caster_t::name; + static handle + cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + // It is definitely wrong to take ownership of this pointer, so mask that rvp + if (policy == return_value_policy::take_ownership + || policy == return_value_policy::automatic) { + policy = return_value_policy::automatic_reference; + } + return caster_t::cast(&src.get(), policy, parent); + } + template + using cast_op_type = std::reference_wrapper; + explicit operator std::reference_wrapper() { return cast_op(subcaster); } +}; + +#define PYBIND11_TYPE_CASTER(type, py_name) \ +protected: \ + type value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template >::value, \ + int> \ + = 0> \ + static ::pybind11::handle cast( \ + T_ *src, ::pybind11::return_value_policy policy, ::pybind11::handle parent) { \ + if (!src) \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() { return &value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() { return value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = ::pybind11::detail::movable_cast_op_type + +template +using is_std_char_type = any_of, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ + >; + +template +struct type_caster::value && !is_std_char_type::value>> { + using _py_type_0 = conditional_t; + using _py_type_1 = conditional_t::value, + _py_type_0, + typename std::make_unsigned<_py_type_0>::type>; + using py_type = conditional_t::value, double, _py_type_1>; + +public: + bool load(handle src, bool convert) { + py_type py_value; + + if (!src) { + return false; + } + +#if !defined(PYPY_VERSION) + auto index_check = [](PyObject *o) { return PyIndex_Check(o); }; +#else + // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`, + // while CPython only considers the existence of `nb_index`/`__index__`. + auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); }; +#endif + + if (std::is_floating_point::value) { + if (convert || PyFloat_Check(src.ptr())) { + py_value = (py_type) PyFloat_AsDouble(src.ptr()); + } else { + return false; + } + } else if (PyFloat_Check(src.ptr()) + || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) { + return false; + } else { + handle src_or_index = src; + // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls. +#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION) + object index; + if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) + index = reinterpret_steal(PyNumber_Index(src.ptr())); + if (!index) { + PyErr_Clear(); + if (!convert) + return false; + } else { + src_or_index = index; + } + } +#endif + if (std::is_unsigned::value) { + py_value = as_unsigned(src_or_index.ptr()); + } else { // signed integer: + py_value = sizeof(T) <= sizeof(long) + ? (py_type) PyLong_AsLong(src_or_index.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); + } + } + + // Python API reported an error + bool py_err = py_value == (py_type) -1 && PyErr_Occurred(); + + // Check to see if the conversion is valid (integers should match exactly) + // Signed/unsigned checks happen elsewhere + if (py_err + || (std::is_integral::value && sizeof(py_type) != sizeof(T) + && py_value != (py_type) (T) py_value)) { + PyErr_Clear(); + if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) { + auto tmp = reinterpret_steal(std::is_floating_point::value + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); + PyErr_Clear(); + return load(tmp, false); + } + return false; + } + + value = (T) py_value; + return true; + } + + template + static typename std::enable_if::value, handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyFloat_FromDouble((double) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) <= sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_SIGNED((long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) <= sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) > sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLongLong((long long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) > sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromUnsignedLongLong((unsigned long long) src); + } + + PYBIND11_TYPE_CASTER(T, const_name::value>("int", "float")); +}; + +template +struct void_caster { +public: + bool load(handle src, bool) { + if (src && src.is_none()) { + return true; + } + return false; + } + static handle cast(T, return_value_policy /* policy */, handle /* parent */) { + return none().release(); + } + PYBIND11_TYPE_CASTER(T, const_name("None")); +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster : public type_caster { +public: + using type_caster::cast; + + bool load(handle h, bool) { + if (!h) { + return false; + } + if (h.is_none()) { + value = nullptr; + return true; + } + + /* Check if this is a capsule */ + if (isinstance(h)) { + value = reinterpret_borrow(h); + return true; + } + + /* Check if this is a C++ type */ + const auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); + if (bases.size() == 1) { // Only allowing loading from a single-value type + value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); + return true; + } + + /* Fail */ + return false; + } + + static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { + if (ptr) { + return capsule(ptr).release(); + } + return none().release(); + } + + template + using cast_op_type = void *&; + explicit operator void *&() { return value; } + static constexpr auto name = const_name("capsule"); + +private: + void *value = nullptr; +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster { +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.ptr() == Py_True) { + value = true; + return true; + } + if (src.ptr() == Py_False) { + value = false; + return true; + } + if (convert || is_numpy_bool(src)) { + // (allow non-implicit conversion for numpy booleans), use strncmp + // since NumPy 1.x had an additional trailing underscore. + + Py_ssize_t res = -1; + if (src.is_none()) { + res = 0; // None is implicitly converted to False + } +#if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" attr exists + else if (hasattr(src, PYBIND11_BOOL_ATTR)) { + res = PyObject_IsTrue(src.ptr()); + } +#else + // Alternate approach for CPython: this does the same as the above, but optimized + // using the CPython API so as to avoid an unneeded attribute lookup. + else if (auto *tp_as_number = src.ptr()->ob_type->tp_as_number) { + if (PYBIND11_NB_BOOL(tp_as_number)) { + res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); + } + } +#endif + if (res == 0 || res == 1) { + value = (res != 0); + return true; + } + PyErr_Clear(); + } + return false; + } + static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { + return handle(src ? Py_True : Py_False).inc_ref(); + } + PYBIND11_TYPE_CASTER(bool, const_name("bool")); + +private: + // Test if an object is a NumPy boolean (without fetching the type). + static inline bool is_numpy_bool(handle object) { + const char *type_name = Py_TYPE(object.ptr())->tp_name; + // Name changed to `numpy.bool` in NumPy 2, `numpy.bool_` is needed for 1.x support + return std::strcmp("numpy.bool", type_name) == 0 + || std::strcmp("numpy.bool_", type_name) == 0; + } +}; + +// Helper class for UTF-{8,16,32} C++ stl strings: +template +struct string_caster { + using CharT = typename StringType::value_type; + + // Simplify life by being able to assume standard char sizes (the standard only guarantees + // minimums, but Python requires exact sizes) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char8_t size != 1"); +#endif + static_assert(!std::is_same::value || sizeof(CharT) == 2, + "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, + "Unsupported char32_t size != 4"); + // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) + static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, + "Unsupported wchar_t size != 2/4"); + static constexpr size_t UTF_N = 8 * sizeof(CharT); + + bool load(handle src, bool) { + handle load_src = src; + if (!src) { + return false; + } + if (!PyUnicode_Check(load_src.ptr())) { + return load_raw(load_src); + } + + // For UTF-8 we avoid the need for a temporary `bytes` object by using + // `PyUnicode_AsUTF8AndSize`. + if (UTF_N == 8) { + Py_ssize_t size = -1; + const auto *buffer + = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); + if (!buffer) { + PyErr_Clear(); + return false; + } + value = StringType(buffer, static_cast(size)); + return true; + } + + auto utfNbytes + = reinterpret_steal(PyUnicode_AsEncodedString(load_src.ptr(), + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr)); + if (!utfNbytes) { + PyErr_Clear(); + return false; + } + + const auto *buffer + = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); + // Skip BOM for UTF-16/32 + if (UTF_N > 8) { + buffer++; + length--; + } + value = StringType(buffer, length); + + // If we're loading a string_view we need to keep the encoded Python object alive: + if (IsView) { + loader_life_support::add_patient(utfNbytes); + } + + return true; + } + + static handle + cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + const char *buffer = reinterpret_cast(src.data()); + auto nbytes = ssize_t(src.size() * sizeof(CharT)); + handle s = decode_utfN(buffer, nbytes); + if (!s) { + throw error_already_set(); + } + return s; + } + + PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) + : UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) + : PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as + // well), so bypass the whole thing by just passing the encoding as a string value, which + // works properly: + return PyUnicode_Decode(buffer, + nbytes, + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr); +#endif + } + + // When loading into a std::string or char*, accept a bytes/bytearray object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. + // which supports loading a unicode from a str, doesn't take this path. + template + bool load_raw(enable_if_t::value, handle> src) { + if (PYBIND11_BYTES_CHECK(src.ptr())) { + // We were passed raw bytes; accept it into a std::string or char* + // without any encoding attempt. + const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); + if (!bytes) { + pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure."); + } + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + if (PyByteArray_Check(src.ptr())) { + // We were passed a bytearray; accept it into a std::string or char* + // without any encoding attempt. + const char *bytearray = PyByteArray_AsString(src.ptr()); + if (!bytearray) { + pybind11_fail("Unexpected PyByteArray_AsString() failure."); + } + value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr())); + return true; + } + + return false; + } + + template + bool load_raw(enable_if_t::value, handle>) { + return false; + } +}; + +template +struct type_caster, + enable_if_t::value>> + : string_caster> {}; + +#ifdef PYBIND11_HAS_STRING_VIEW +template +struct type_caster, + enable_if_t::value>> + : string_caster, true> {}; +#endif + +// Type caster for C-style strings. We basically use a std::string type caster, but also add the +// ability to use None as a nullptr char* (which the string caster doesn't allow). +template +struct type_caster::value>> { + using StringType = std::basic_string; + using StringCaster = make_caster; + StringCaster str_caster; + bool none = false; + CharT one_char = 0; + +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) { + return false; + } + none = true; + return true; + } + return str_caster.load(src, convert); + } + + static handle cast(const CharT *src, return_value_policy policy, handle parent) { + if (src == nullptr) { + return pybind11::none().release(); + } + return StringCaster::cast(StringType(src), policy, parent); + } + + static handle cast(CharT src, return_value_policy policy, handle parent) { + if (std::is_same::value) { + handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); + if (!s) { + throw error_already_set(); + } + return s; + } + return StringCaster::cast(StringType(1, src), policy, parent); + } + + explicit operator CharT *() { + return none ? nullptr : const_cast(static_cast(str_caster).c_str()); + } + explicit operator CharT &() { + if (none) { + throw value_error("Cannot convert None to a character"); + } + + auto &value = static_cast(str_caster); + size_t str_len = value.size(); + if (str_len == 0) { + throw value_error("Cannot convert empty string to a character"); + } + + // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that + // is too high, and one for multiple unicode characters (caught later), so we need to + // figure out how long the first encoded character is in bytes to distinguish between these + // two errors. We also allow want to allow unicode characters U+0080 through U+00FF, as + // those can fit into a single char value. + if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) { + auto v0 = static_cast(value[0]); + // low bits only: 0-127 + // 0b110xxxxx - start of 2-byte sequence + // 0b1110xxxx - start of 3-byte sequence + // 0b11110xxx - start of 4-byte sequence + size_t char0_bytes = (v0 & 0x80) == 0 ? 1 + : (v0 & 0xE0) == 0xC0 ? 2 + : (v0 & 0xF0) == 0xE0 ? 3 + : 4; + + if (char0_bytes == str_len) { + // If we have a 128-255 value, we can decode it into a single char: + if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx + one_char = static_cast(((v0 & 3) << 6) + + (static_cast(value[1]) & 0x3F)); + return one_char; + } + // Otherwise we have a single character, but it's > U+00FF + throw value_error("Character code point not in range(0x100)"); + } + } + + // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a + // surrogate pair with total length 2 instantly indicates a range error (but not a "your + // string was too long" error). + else if (StringCaster::UTF_N == 16 && str_len == 2) { + one_char = static_cast(value[0]); + if (one_char >= 0xD800 && one_char < 0xE000) { + throw value_error("Character code point not in range(0x10000)"); + } + } + + if (str_len != 1) { + throw value_error("Expected a character, but multi-character string found"); + } + + one_char = value[0]; + return one_char; + } + + static constexpr auto name = const_name(PYBIND11_STRING_NAME); + template + using cast_op_type = pybind11::detail::cast_op_type<_T>; +}; + +// Base implementation for std::tuple and std::pair +template