| |
|
| |
|
| | import collections |
| |
|
| | import caffe2.python.hypothesis_test_util as hu |
| | import hypothesis.strategies as st |
| | from caffe2.python import core, dyndep, workspace |
| | from caffe2.quantization.server import utils as dnnlowp_utils |
| | from caffe2.quantization.server.dnnlowp_test_utils import ( |
| | check_quantized_results_close, |
| | generate_conv_inputs, |
| | generate_convnd_inputs, |
| | run_conv_or_fc, |
| | ) |
| | from hypothesis import assume, given, settings |
| |
|
| |
|
| | dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops") |
| | workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"]) |
| |
|
| |
|
| | class DNNLowPOpConvTest(hu.HypothesisTestCase): |
| | |
| | @given( |
| | stride=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | kernel=st.integers(1, 5), |
| | dilation=st.integers(1, 2), |
| | size=st.integers(10, 16), |
| | group=st.integers(1, 4), |
| | input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]), |
| | output_channels_per_group=st.integers(2, 16), |
| | batch_size=st.integers(0, 3), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | weight_quantized=st.booleans(), |
| | prepack_weight=st.booleans(), |
| | share_col_buffer=st.booleans(), |
| | preserve_activation_sparsity=st.booleans(), |
| | preserve_weight_sparsity=st.booleans(), |
| | **hu.gcs_cpu_only |
| | ) |
| | @settings(max_examples=10, deadline=None) |
| | def test_dnnlowp_conv_int( |
| | self, |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | weight_quantized, |
| | prepack_weight, |
| | share_col_buffer, |
| | preserve_activation_sparsity, |
| | preserve_weight_sparsity, |
| | gc, |
| | dc, |
| | ): |
| | assume(group == 1 or dilation == 1) |
| | assume((not prepack_weight) or order == "NHWC") |
| |
|
| | X, W, b = generate_conv_inputs( |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | preserve_activation_sparsity=preserve_activation_sparsity, |
| | preserve_weight_sparsity=preserve_weight_sparsity, |
| | ) |
| |
|
| | Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) |
| | outputs = [] |
| |
|
| | op_engine_list = [ |
| | ("Conv", ""), |
| | ("Conv", "DNNLOWP"), |
| | ("Conv", "DNNLOWP_16"), |
| | ("Int8Conv", "DNNLOWP"), |
| | ] |
| |
|
| | for op_type, engine in op_engine_list: |
| | init_net = core.Net("test_init_net") |
| | net = core.Net("test_net") |
| |
|
| | do_quantize = "DNNLOWP" in engine |
| | do_dequantize = "DNNLOWP" in engine |
| | |
| | |
| | |
| | |
| | do_quantize_weight = ( |
| | engine == "DNNLOWP" and weight_quantized and len(outputs) > 0 |
| | ) |
| | do_prepack_weight = engine == "DNNLOWP" and prepack_weight |
| |
|
| | if do_quantize: |
| | quantize = core.CreateOperator( |
| | "Quantize", |
| | ["X"], |
| | ["X_q"], |
| | preserve_activation_sparsity=preserve_activation_sparsity, |
| | engine=engine, |
| | device_option=gc, |
| | ) |
| | net.Proto().op.extend([quantize]) |
| |
|
| | X_min = 0 if X.size == 0 else X.min() |
| | X_max = 0 if X.size == 0 else X.max() |
| | x_q_param = dnnlowp_utils.choose_quantization_params( |
| | X_min, X_max, preserve_activation_sparsity |
| | ) |
| | if do_quantize_weight: |
| | int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill( |
| | W, "W_q", preserve_weight_sparsity |
| | ) |
| | init_net.Proto().op.extend([int8_given_tensor_fill]) |
| |
|
| | |
| | int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill( |
| | b, "b_q", x_q_param, w_q_param |
| | ) |
| | init_net.Proto().op.extend([int8_bias_tensor_fill]) |
| |
|
| | if do_prepack_weight: |
| | inputs = ["W_q" if do_quantize_weight else "W"] |
| | if do_dequantize: |
| | inputs += ["b_q" if do_quantize_weight else "b"] |
| | pack = core.CreateOperator( |
| | "Int8ConvPackWeight", |
| | inputs, |
| | ["W_packed"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | preserve_weight_sparsity=preserve_weight_sparsity, |
| | engine=engine, |
| | group=group, |
| | in_scale=x_q_param.scale, |
| | ) |
| | init_net.Proto().op.extend([pack]) |
| |
|
| | conv = core.CreateOperator( |
| | op_type, |
| | [ |
| | "X_q" if do_quantize else "X", |
| | "W_packed" |
| | if do_prepack_weight |
| | else ("W_q" if do_quantize_weight else "W"), |
| | "b_q" if do_quantize_weight else "b", |
| | ], |
| | ["Y_q" if do_dequantize else "Y"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | order=order, |
| | shared_buffer=(1 if share_col_buffer else 0), |
| | preserve_activation_sparsity=preserve_activation_sparsity, |
| | preserve_weight_sparsity=preserve_weight_sparsity, |
| | engine=engine, |
| | group=group, |
| | device_option=gc, |
| | ) |
| | if do_quantize_weight or do_prepack_weight: |
| | |
| | |
| | |
| | |
| | dnnlowp_utils.add_quantization_param_args( |
| | conv, outputs[0][0], preserve_activation_sparsity |
| | ) |
| | net.Proto().op.extend([conv]) |
| |
|
| | if do_dequantize: |
| | dequantize = core.CreateOperator( |
| | "Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([dequantize]) |
| |
|
| | run_conv_or_fc( |
| | self, init_net, net, X, W, b, op_type, engine, order, gc, outputs |
| | ) |
| |
|
| | check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity) |
| |
|
| | |
| | @given( |
| | stride=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | kernel=st.integers(1, 5), |
| | dilation=st.integers(1, 2), |
| | size=st.integers(10, 16), |
| | group=st.integers(1, 4), |
| | input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]), |
| | output_channels_per_group=st.integers(2, 16), |
| | batch_size=st.integers(0, 3), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | share_col_buffer=st.booleans(), |
| | **hu.gcs_cpu_only |
| | ) |
| | @settings(max_examples=10, deadline=None) |
| | def test_dnnlowp_conv_relu_int( |
| | self, |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | share_col_buffer, |
| | gc, |
| | dc, |
| | ): |
| | assume(group == 1 or dilation == 1) |
| | assume(order == "NHWC" or input_channels_per_group <= 8 and output_channels_per_group <= 8) |
| |
|
| | X, W, b = generate_conv_inputs( |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | ) |
| |
|
| | Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) |
| | outputs = [] |
| |
|
| | op_engine_list = [ |
| | ("Conv", ""), |
| | ("ConvRelu", "DNNLOWP"), |
| | ("ConvRelu", "DNNLOWP_16"), |
| | ("Int8ConvRelu", "DNNLOWP"), |
| | ] |
| |
|
| | for op_type, engine in op_engine_list: |
| | net = core.Net("test_net") |
| |
|
| | if "DNNLOWP" in engine: |
| | quantize = core.CreateOperator( |
| | "Quantize", ["X"], ["X_q"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([quantize]) |
| |
|
| | conv = core.CreateOperator( |
| | op_type, |
| | ["X_q", "W", "b"], |
| | ["Y_q"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | order=order, |
| | engine=engine, |
| | shared_buffer=(1 if share_col_buffer else 0), |
| | group=group, |
| | device_option=gc, |
| | ) |
| | net.Proto().op.extend([conv]) |
| |
|
| | dequantize = core.CreateOperator( |
| | "Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([dequantize]) |
| | else: |
| | conv = core.CreateOperator( |
| | op_type, |
| | ["X", "W", "b"], |
| | ["Y"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | order=order, |
| | shared_buffer=(1 if share_col_buffer else 0), |
| | engine=engine, |
| | group=group, |
| | device_option=gc, |
| | ) |
| | net.Proto().op.extend([conv]) |
| |
|
| | relu = core.CreateOperator( |
| | "Relu", ["Y"], ["Y"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([relu]) |
| |
|
| | run_conv_or_fc( |
| | self, None, net, X, W, b, op_type, engine, order, gc, outputs |
| | ) |
| |
|
| | check_quantized_results_close(outputs) |
| |
|
| | def _test_dnnlowp_nd_int( |
| | self, |
| | stride, |
| | pad, |
| | kernels, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | prepack_weight, |
| | gc, |
| | dc, |
| | ): |
| | assume(group == 1 or dilation == 1) |
| | assume((not prepack_weight) or order == "NHWC") |
| | ndim = len(kernels) |
| |
|
| | X, W, b = generate_convnd_inputs( |
| | (stride,) * ndim, |
| | (pad,) * ndim, |
| | kernels, |
| | (dilation,) * ndim, |
| | (size,) * ndim, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | ) |
| |
|
| | Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) |
| | outputs = [] |
| |
|
| | op_engine_list = [("Conv", ""), ("Conv", "DNNLOWP_16"), ("Int8Conv", "DNNLOWP")] |
| |
|
| | for op_type, engine in op_engine_list: |
| | init_net = core.Net("test_init_net") |
| | net = core.Net("test_net") |
| |
|
| | do_quantize = "DNNLOWP" in engine |
| | do_dequantize = "DNNLOWP" in engine |
| | |
| | |
| | |
| | |
| | do_quantize_weight = engine == "DNNLOWP" and len(outputs) > 0 |
| | do_prepack_weight = engine == "DNNLOWP" and prepack_weight |
| |
|
| | if do_quantize: |
| | quantize = core.CreateOperator( |
| | "Quantize", ["X"], ["X_q"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([quantize]) |
| |
|
| | X_min = 0 if X.size == 0 else X.min() |
| | X_max = 0 if X.size == 0 else X.max() |
| | x_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max) |
| | if do_quantize_weight: |
| | int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill( |
| | W, "W_q" |
| | ) |
| | init_net.Proto().op.extend([int8_given_tensor_fill]) |
| |
|
| | |
| | int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill( |
| | b, "b_q", x_q_param, w_q_param |
| | ) |
| | init_net.Proto().op.extend([int8_bias_tensor_fill]) |
| |
|
| | if do_prepack_weight: |
| | inputs = ["W_q" if do_quantize_weight else "W"] |
| | if do_dequantize: |
| | inputs += ["b_q" if do_quantize_weight else "b"] |
| | pack = core.CreateOperator( |
| | "Int8ConvPackWeight", |
| | inputs, |
| | ["W_packed"], |
| | strides=[stride] * ndim, |
| | kernels=kernels, |
| | dilations=[dilation] * ndim, |
| | pads=[pad] * (ndim * 2), |
| | engine=engine, |
| | group=group, |
| | in_scale=x_q_param.scale, |
| | ) |
| | init_net.Proto().op.extend([pack]) |
| |
|
| | conv = core.CreateOperator( |
| | op_type, |
| | [ |
| | "X_q" if do_quantize else "X", |
| | "W_packed" |
| | if do_prepack_weight |
| | else ("W_q" if do_quantize_weight else "W"), |
| | "b_q" if do_quantize_weight else "b", |
| | ], |
| | ["Y_q" if do_dequantize else "Y"], |
| | strides=[stride] * ndim, |
| | kernels=kernels, |
| | dilations=[dilation] * ndim, |
| | pads=[pad] * (ndim * 2), |
| | order=order, |
| | dequantize_output=not do_dequantize, |
| | engine=engine, |
| | group=group, |
| | device_option=gc, |
| | ) |
| | if do_quantize_weight or do_prepack_weight: |
| | |
| | |
| | |
| | |
| | dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0]) |
| | net.Proto().op.extend([conv]) |
| |
|
| | if do_dequantize: |
| | dequantize = core.CreateOperator( |
| | "Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc |
| | ) |
| | net.Proto().op.extend([dequantize]) |
| |
|
| | run_conv_or_fc( |
| | self, init_net, net, X, W, b, op_type, engine, order, gc, outputs |
| | ) |
| |
|
| | check_quantized_results_close(outputs) |
| |
|
| | @given( |
| | stride=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | temporal_kernels=st.sampled_from([1, 5]), |
| | spatial_kernels=st.sampled_from([1, 3]), |
| | dilation=st.integers(1, 1), |
| | size=st.sampled_from([5, 8]), |
| | group=st.integers(1, 2), |
| | input_channels_per_group=st.sampled_from([2, 3]), |
| | output_channels_per_group=st.sampled_from([2, 3]), |
| | batch_size=st.integers(0, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | prepack_weight=st.booleans(), |
| | **hu.gcs_cpu_only |
| | ) |
| | @settings(deadline=None, max_examples=50) |
| | def test_dnnlowp_conv3d_int( |
| | self, |
| | stride, |
| | pad, |
| | temporal_kernels, |
| | spatial_kernels, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | prepack_weight, |
| | gc, |
| | dc, |
| | ): |
| | self._test_dnnlowp_nd_int( |
| | stride, |
| | pad, |
| | (temporal_kernels,) + (spatial_kernels,) * 2, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | prepack_weight, |
| | gc, |
| | dc, |
| | ) |
| |
|
| | @given( |
| | stride=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | kernels=st.sampled_from([1, 3]), |
| | dilation=st.integers(1, 1), |
| | size=st.sampled_from([5, 8]), |
| | group=st.integers(1, 2), |
| | input_channels_per_group=st.sampled_from([2, 3]), |
| | output_channels_per_group=st.sampled_from([2, 3]), |
| | batch_size=st.integers(0, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | prepack_weight=st.booleans(), |
| | **hu.gcs_cpu_only |
| | ) |
| | def test_dnnlowp_conv1d_int( |
| | self, |
| | stride, |
| | pad, |
| | kernels, |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | prepack_weight, |
| | gc, |
| | dc, |
| | ): |
| | self._test_dnnlowp_nd_int( |
| | stride, |
| | pad, |
| | (kernels,), |
| | dilation, |
| | size, |
| | group, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | order, |
| | prepack_weight, |
| | gc, |
| | dc, |
| | ) |
| |
|