| |
|
| |
|
| | import collections |
| | import functools |
| | import unittest |
| |
|
| | import caffe2.python._import_c_extension as C |
| | import caffe2.python.hip_test_util as hiputl |
| | import caffe2.python.hypothesis_test_util as hu |
| | import caffe2.python.serialized_test.serialized_test_util as serial |
| | import hypothesis.strategies as st |
| | import numpy as np |
| | from caffe2.proto import caffe2_pb2 |
| | from caffe2.python import brew, core, utils, workspace |
| | from caffe2.python.model_helper import ModelHelper |
| | from hypothesis import assume, given, settings |
| |
|
| |
|
| | def _cudnn_supports(dilation=False, nhwc=False, backward=False): |
| | """Return True if cuDNN supports this configuration.""" |
| | v = workspace.GetCuDNNVersion() |
| | if backward: |
| | if nhwc: |
| | |
| | return False |
| | else: |
| | |
| | if dilation and v < 6000: |
| | |
| | return False |
| | if dilation and nhwc: |
| | |
| | return False |
| | return True |
| |
|
| |
|
| | def _cudnn_convolution_algo_count(direction): |
| | try: |
| | if direction == "fwd": |
| | return st.integers(0, C.cudnn_convolution_fwd_algo_count - 1) |
| | elif direction == "dgrad": |
| | return st.integers(0, C.cudnn_convolution_bwd_data_algo_count - 1) |
| | elif direction == "wgrad": |
| | return st.integers(0, C.cudnn_convolution_bwd_filter_algo_count - 1) |
| | else: |
| | assert False |
| | except Exception: |
| | return st.sampled_from([-1]) |
| |
|
| |
|
| | class TestConvolution(serial.SerializedTestCase): |
| | |
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv2D"]), |
| | stride_h=st.integers(1, 3), |
| | stride_w=st.integers(1, 3), |
| | pad_t=st.integers(0, 3), |
| | pad_l=st.integers(0, 3), |
| | pad_b=st.integers(0, 3), |
| | pad_r=st.integers(0, 3), |
| | kernel=st.integers(3, 5), |
| | size=st.integers(1, 8), |
| | input_channels=st.integers(1, 3), |
| | output_channels=st.integers(1, 3), |
| | batch_size=st.integers(0, 3), |
| | group=st.integers(1, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | engine=st.sampled_from(["", "EIGEN"]), |
| | shared_buffer=st.booleans(), |
| | use_bias=st.booleans(), |
| | **hu.gcs |
| | ) |
| | @settings(deadline=None, max_examples=50) |
| | def test_convolution_separate_stride_pad_gradients( |
| | self, |
| | op_type, |
| | stride_h, |
| | stride_w, |
| | pad_t, |
| | pad_l, |
| | pad_b, |
| | pad_r, |
| | kernel, |
| | size, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | group, |
| | order, |
| | engine, |
| | shared_buffer, |
| | use_bias, |
| | gc, |
| | dc, |
| | ): |
| | |
| | assume(group == 1 or order == "NCHW" or gc.device_type == caffe2_pb2.CPU) |
| | if group != 1 and order == "NHWC": |
| | dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] |
| | |
| | assume(group == 1 or engine != "EIGEN") |
| |
|
| | input_channels *= group |
| | output_channels *= group |
| |
|
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | stride_h=stride_h, |
| | stride_w=stride_w, |
| | pad_t=pad_t, |
| | pad_l=pad_l, |
| | pad_b=pad_b, |
| | pad_r=pad_r, |
| | kernel=kernel, |
| | group=group, |
| | order=order, |
| | engine=engine, |
| | shared_buffer=int(shared_buffer), |
| | ) |
| | X = ( |
| | np.random.rand(batch_size, size, size, input_channels).astype(np.float32) |
| | - 0.5 |
| | ) |
| | w = ( |
| | np.random.rand( |
| | output_channels, kernel, kernel, int(input_channels / group) |
| | ).astype(np.float32) |
| | - 0.5 |
| | ) |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| | if order == "NCHW": |
| | X = utils.NHWC2NCHW(X) |
| | w = utils.NHWC2NCHW(w) |
| |
|
| | inputs = [X, w, b] if use_bias else [X, w] |
| |
|
| | |
| | if size + pad_r + pad_l < kernel or size + pad_t + pad_b < kernel: |
| | with self.assertRaises(RuntimeError): |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | return |
| |
|
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | for i in range(len(inputs)): |
| | self.assertGradientChecks(gc, op, inputs, i, [0]) |
| |
|
| | |
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv2D"]), |
| | stride_h=st.integers(1, 3), |
| | stride_w=st.integers(1, 3), |
| | pad_t=st.integers(0, 3), |
| | pad_l=st.integers(0, 3), |
| | pad_b=st.integers(0, 3), |
| | pad_r=st.integers(0, 3), |
| | kernel=st.integers(1, 5), |
| | size=st.integers(7, 10), |
| | input_channels=st.integers(1, 8), |
| | output_channels=st.integers(1, 8), |
| | batch_size=st.integers(0, 3), |
| | engine=st.sampled_from(["", "EIGEN"]), |
| | use_bias=st.booleans(), |
| | **hu.gcs |
| | ) |
| | @settings(deadline=None) |
| | def test_convolution_separate_stride_pad_layout( |
| | self, |
| | op_type, |
| | stride_h, |
| | stride_w, |
| | pad_t, |
| | pad_l, |
| | pad_b, |
| | pad_r, |
| | kernel, |
| | size, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | engine, |
| | use_bias, |
| | gc, |
| | dc, |
| | ): |
| | X = ( |
| | np.random.rand(batch_size, size, size, input_channels).astype(np.float32) |
| | - 0.5 |
| | ) |
| | w = ( |
| | np.random.rand(output_channels, kernel, kernel, input_channels).astype( |
| | np.float32 |
| | ) |
| | - 0.5 |
| | ) |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| | outputs = {} |
| | for order in ["NCHW", "NHWC"]: |
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | stride_h=stride_h, |
| | stride_w=stride_w, |
| | kernel=kernel, |
| | pad_t=pad_t, |
| | pad_l=pad_l, |
| | pad_b=pad_b, |
| | pad_r=pad_r, |
| | order=order, |
| | engine=engine, |
| | device_option=gc, |
| | ) |
| | if order == "NCHW": |
| | X_f = utils.NHWC2NCHW(X) |
| | w_f = utils.NHWC2NCHW(w) |
| | else: |
| | X_f = X |
| | w_f = w |
| | self.ws.create_blob("X").feed(X_f, device_option=gc) |
| | self.ws.create_blob("w").feed(w_f, device_option=gc) |
| | self.ws.create_blob("b").feed(b, device_option=gc) |
| | self.ws.run(op) |
| | outputs[order] = self.ws.blobs["Y"].fetch() |
| | np.testing.assert_allclose( |
| | outputs["NCHW"], utils.NHWC2NCHW(outputs["NHWC"]), atol=1e-4, rtol=1e-4 |
| | ) |
| |
|
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv2D"]), |
| | stride=st.integers(1, 3), |
| | pad=st.integers(0, 3), |
| | kernel=st.integers(1, 5), |
| | dilation=st.integers(1, 3), |
| | size=st.integers(7, 10), |
| | input_channels=st.integers(1, 8), |
| | output_channels=st.integers(1, 8), |
| | batch_size=st.integers(0, 3), |
| | group=st.integers(1, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | engine=st.sampled_from(["", "CUDNN", "MKLDNN"]), |
| | use_bias=st.booleans(), |
| | force_algo_fwd=_cudnn_convolution_algo_count("fwd"), |
| | force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"), |
| | force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"), |
| | **hu.gcs |
| | ) |
| | @settings(max_examples=20, deadline=None) |
| | def test_convolution_gradients( |
| | self, |
| | op_type, |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | group, |
| | order, |
| | engine, |
| | use_bias, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | |
| | assume( |
| | group == 1 |
| | or (order == "NCHW" or gc.device_type == caffe2_pb2.CPU) |
| | and engine != "MKLDNN" |
| | ) |
| | if group != 1 and order == "NHWC": |
| | dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] |
| |
|
| | input_channels *= group |
| | output_channels *= group |
| | dkernel = dilation * (kernel - 1) + 1 |
| |
|
| | if engine == "CUDNN": |
| | if hiputl.run_in_hip(gc, dc): |
| | assume((order == "NCHW") and not (dilation > 1 and group > 1)) |
| | else: |
| | assume( |
| | _cudnn_supports( |
| | dilation=(dilation > 1), nhwc=(order == "NHWC"), backward=True |
| | ) |
| | ) |
| |
|
| | assume(engine != "MKLDNN" or use_bias is True) |
| |
|
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | group=group, |
| | order=order, |
| | engine=engine, |
| | force_algo_fwd=force_algo_fwd, |
| | force_algo_dgrad=force_algo_dgrad, |
| | force_algo_wgrad=force_algo_wgrad, |
| | ) |
| | X = ( |
| | np.random.rand(batch_size, size, size, input_channels).astype(np.float32) |
| | - 0.5 |
| | ) |
| | w = ( |
| | np.random.rand( |
| | output_channels, kernel, kernel, int(input_channels / group) |
| | ).astype(np.float32) |
| | - 0.5 |
| | ) |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| | if order == "NCHW": |
| | X = utils.NHWC2NCHW(X) |
| | w = utils.NHWC2NCHW(w) |
| |
|
| | inputs = [X, w, b] if use_bias else [X, w] |
| | |
| | if size + pad + pad < dkernel or size + pad + pad < dkernel: |
| | with self.assertRaises(RuntimeError): |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | return |
| |
|
| | try: |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | except RuntimeError as e: |
| | es = str(e) |
| | |
| | |
| | if ( |
| | "status == CUDNN_STATUS_SUCCESS" not in es |
| | or "CUDNN_STATUS_NOT_SUPPORTED" not in es |
| | or force_algo_fwd == 0 |
| | ): |
| | raise e |
| |
|
| | for i in range(len(inputs)): |
| | try: |
| | self.assertGradientChecks(gc, op, inputs, i, [0]) |
| | except RuntimeError as e: |
| | es = str(e) |
| | if ( |
| | "status == CUDNN_STATUS_SUCCESS" not in es |
| | or "CUDNN_STATUS_NOT_SUPPORTED" not in es |
| | ): |
| | raise e |
| |
|
| | def _nd_convolution( |
| | self, |
| | n, |
| | input_channels_per_group, |
| | output_channels_per_group, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | group, |
| | order, |
| | use_bias, |
| | engine, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | |
| | |
| | assume( |
| | group == 1 |
| | or (n != 1 and order == "NCHW") |
| | or gc.device_type == caffe2_pb2.CPU |
| | ) |
| | if group != 1 and (n == 1 or order == "NHWC"): |
| | dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] |
| |
|
| | input_channels = group * input_channels_per_group |
| | output_channels = group * output_channels_per_group |
| |
|
| | dkernel = dilation * (kernel - 1) + 1 |
| | for op_type in ["Conv", "Conv" + str(n) + "D"]: |
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | strides=[stride] * n, |
| | kernels=[kernel] * n, |
| | dilations=[dilation] * n, |
| | pads=[pad] * n * 2, |
| | group=group, |
| | order=order, |
| | engine=engine, |
| | force_algo_fwd=force_algo_fwd, |
| | force_algo_dgrad=force_algo_dgrad, |
| | force_algo_wgrad=force_algo_wgrad, |
| | ) |
| |
|
| | input_dims = [batch_size, input_channels] |
| | input_dims.extend([size] * n) |
| | filter_dims = [output_channels, input_channels // group] |
| | filter_dims.extend([kernel] * n) |
| |
|
| | X = np.random.rand(*input_dims).astype(np.float32) - 0.5 |
| | w = np.random.rand(*filter_dims).astype(np.float32) - 0.5 |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| | if order == "NHWC": |
| | X = utils.NCHW2NHWC(X) |
| | w = utils.NCHW2NHWC(w) |
| |
|
| | inputs = [X, w, b] if use_bias else [X, w] |
| |
|
| | if size + pad + pad < dkernel or size + pad + pad < dkernel: |
| | with self.assertRaises(RuntimeError): |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | return |
| |
|
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | for i in range(len(inputs)): |
| | self.assertGradientChecks(gc, op, inputs, i, [0]) |
| |
|
| | @given( |
| | input_channels=st.integers(1, 3), |
| | output_channels=st.integers(1, 2), |
| | batch_size=st.integers(0, 3), |
| | stride=st.integers(1, 3), |
| | size=st.integers(7, 10), |
| | kernel=st.integers(1, 2), |
| | dilation=st.integers(1, 3), |
| | pad=st.integers(0, 3), |
| | group=st.integers(1, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | use_bias=st.booleans(), |
| | engine=st.sampled_from(["", "CUDNN"]), |
| | force_algo_fwd=_cudnn_convolution_algo_count("fwd"), |
| | force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"), |
| | force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"), |
| | **hu.gcs |
| | ) |
| | @settings(deadline=10000) |
| | def test_1d_convolution( |
| | self, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | group, |
| | order, |
| | use_bias, |
| | engine, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | if hiputl.run_in_hip(gc, dc): |
| | |
| | assume(engine != "CUDNN") |
| | |
| | assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU) |
| | if order == "NHWC": |
| | dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] |
| |
|
| | self._nd_convolution( |
| | 1, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | group, |
| | order, |
| | use_bias, |
| | engine, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ) |
| |
|
| | @given( |
| | input_channels=st.integers(1, 2), |
| | output_channels=st.integers(1, 2), |
| | batch_size=st.integers(0, 2), |
| | stride=st.integers(1, 2), |
| | size=st.integers(4, 5), |
| | kernel=st.integers(1, 2), |
| | dilation=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | group=st.integers(1, 2), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | use_bias=st.booleans(), |
| | engine=st.sampled_from(["", "MIOPEN"]), |
| | force_algo_fwd=_cudnn_convolution_algo_count("fwd"), |
| | force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"), |
| | force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"), |
| | **hu.gcs |
| | ) |
| | @settings(max_examples=20, deadline=None) |
| | def test_3d_convolution( |
| | self, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | group, |
| | order, |
| | use_bias, |
| | engine, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | |
| | assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU) |
| | if order == "NHWC": |
| | dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] |
| | self._nd_convolution( |
| | 3, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | group, |
| | order, |
| | use_bias, |
| | engine, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ) |
| |
|
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv3D"]), |
| | batch_size=st.integers(0, 2), |
| | stride=st.integers(1, 2), |
| | size=st.integers(3, 5), |
| | kernel=st.integers(1, 2), |
| | dilation=st.integers(1, 2), |
| | pad=st.integers(0, 2), |
| | use_bias=st.booleans(), |
| | force_algo_fwd=_cudnn_convolution_algo_count("fwd"), |
| | force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"), |
| | force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"), |
| | **hu.gcs_no_hip |
| | ) |
| | @settings(deadline=10000) |
| | def test_3d_convolution_cudnn_nchw( |
| | self, |
| | op_type, |
| | batch_size, |
| | stride, |
| | size, |
| | kernel, |
| | dilation, |
| | pad, |
| | use_bias, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | input_channels = 1 |
| | output_channels = 1 |
| | n = 3 |
| | dkernel = dilation * (kernel - 1) + 1 |
| | order = "NCHW" |
| |
|
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | strides=[stride] * n, |
| | kernels=[kernel] * n, |
| | dilations=[dilation] * n, |
| | pads=[pad] * n * 2, |
| | order=order, |
| | engine="CUDNN", |
| | force_algo_fwd=force_algo_fwd, |
| | force_algo_dgrad=force_algo_dgrad, |
| | force_algo_wgrad=force_algo_wgrad, |
| | ) |
| |
|
| | input_dims = [batch_size, input_channels] |
| | input_dims.extend([size] * n) |
| | filter_dims = [output_channels, input_channels] |
| | filter_dims.extend([kernel] * n) |
| | X = np.random.rand(*input_dims).astype(np.float32) - 0.5 |
| | w = np.random.rand(*filter_dims).astype(np.float32) - 0.5 |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| |
|
| | inputs = [X, w, b] if use_bias else [X, w] |
| |
|
| | if size + pad + pad < dkernel or size + pad + pad < dkernel: |
| | with self.assertRaises(RuntimeError): |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | return |
| |
|
| | try: |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | except RuntimeError as e: |
| | es = str(e) |
| | |
| | |
| | if ( |
| | "status == CUDNN_STATUS_SUCCESS" not in es |
| | or "CUDNN_STATUS_NOT_SUPPORTED" not in es |
| | or force_algo_fwd == 0 |
| | ): |
| | raise e |
| |
|
| | for i in range(len(inputs)): |
| | try: |
| | self.assertGradientChecks(gc, op, inputs, i, [0]) |
| | except RuntimeError as e: |
| | es = str(e) |
| | if ( |
| | "status == CUDNN_STATUS_SUCCESS" not in es |
| | or "CUDNN_STATUS_NOT_SUPPORTED" not in es |
| | ): |
| | raise e |
| |
|
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv2D"]), |
| | stride=st.integers(1, 3), |
| | pad=st.integers(0, 3), |
| | kernel=st.integers(1, 5), |
| | dilation=st.integers(1, 3), |
| | size=st.integers(7, 10), |
| | input_channels=st.integers(1, 8), |
| | output_channels=st.integers(1, 8), |
| | batch_size=st.integers(0, 3), |
| | use_bias=st.booleans(), |
| | **hu.gcs |
| | ) |
| | @settings(deadline=None, max_examples=50) |
| | def test_convolution_layout( |
| | self, |
| | op_type, |
| | stride, |
| | pad, |
| | kernel, |
| | dilation, |
| | size, |
| | input_channels, |
| | output_channels, |
| | batch_size, |
| | use_bias, |
| | gc, |
| | dc, |
| | ): |
| | assume(size >= dilation * (kernel - 1) + 1) |
| |
|
| | X = ( |
| | np.random.rand(batch_size, size, size, input_channels).astype(np.float32) |
| | - 0.5 |
| | ) |
| | w = ( |
| | np.random.rand(output_channels, kernel, kernel, input_channels).astype( |
| | np.float32 |
| | ) |
| | - 0.5 |
| | ) |
| | b = np.random.rand(output_channels).astype(np.float32) - 0.5 |
| | Output = collections.namedtuple("Output", ["Y", "engine", "order"]) |
| | outputs = [] |
| |
|
| | for order in ["NCHW", "NHWC"]: |
| | engine_list = [""] |
| | if hiputl.run_in_hip(gc, dc): |
| | if order == "NCHW": |
| | engine_list.append("MIOPEN") |
| | else: |
| | if _cudnn_supports(dilation=(dilation > 1), nhwc=(order == "NHWC")): |
| | engine_list.append("CUDNN") |
| |
|
| | for engine in engine_list: |
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "w", "b"] if use_bias else ["X", "w"], |
| | ["Y"], |
| | stride=stride, |
| | kernel=kernel, |
| | dilation=dilation, |
| | pad=pad, |
| | order=order, |
| | engine=engine, |
| | device_option=gc, |
| | exhaustive_search=True, |
| | ) |
| | if order == "NCHW": |
| | X_f = utils.NHWC2NCHW(X) |
| | w_f = utils.NHWC2NCHW(w) |
| | else: |
| | X_f = X |
| | w_f = w |
| | self.assertDeviceChecks( |
| | dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0] |
| | ) |
| | self.ws.create_blob("X").feed(X_f, device_option=gc) |
| | self.ws.create_blob("w").feed(w_f, device_option=gc) |
| | self.ws.create_blob("b").feed(b, device_option=gc) |
| | self.ws.run(op) |
| | outputs.append( |
| | Output(Y=self.ws.blobs["Y"].fetch(), engine=engine, order=order) |
| | ) |
| |
|
| | def canonical(o): |
| | if o.order == "NHWC": |
| | return utils.NHWC2NCHW(o.Y) |
| | else: |
| | return o.Y |
| |
|
| | for o in outputs: |
| | np.testing.assert_allclose( |
| | canonical(outputs[0]), canonical(o), atol=1e-4, rtol=1e-4 |
| | ) |
| |
|
| | @given( |
| | num_workers=st.integers(1, 4), |
| | net_type=st.sampled_from( |
| | ["simple", "dag"] |
| | + ( |
| | ["async_dag"] |
| | if workspace.has_gpu_support |
| | else [] |
| | ) |
| | ), |
| | engine=st.sampled_from(["CUDNN", ""]), |
| | **hu.gcs_no_hip |
| | ) |
| | @settings(deadline=None) |
| | def test_convolution_sync(self, net_type, num_workers, engine, gc, dc): |
| | m = ModelHelper(name="test_model") |
| | n = 1 |
| | d = 2 |
| | depth = 3 |
| | iters = 5 |
| | h = 5 |
| | w = 5 |
| | workspace.ResetWorkspace() |
| |
|
| | use_cudnn = engine == "CUDNN" |
| |
|
| | np.random.seed(1701) |
| | |
| | for i in reversed(range(depth)): |
| | for j in range(2 ** i): |
| | bottom_1 = "{}_{}".format(i + 1, 2 * j) |
| | bottom_2 = "{}_{}".format(i + 1, 2 * j + 1) |
| | mid_1 = "{}_{}_m".format(i + 1, 2 * j) |
| | mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1) |
| | top = "{}_{}".format(i, j) |
| | w1, b1, w2, b2 = np.random.randn(4).tolist() |
| | brew.conv( |
| | m, |
| | bottom_1, |
| | mid_1, |
| | dim_in=d, |
| | dim_out=d, |
| | kernel=3, |
| | weight_init=("ConstantFill", {"value": w1}), |
| | bias_init=("ConstantFill", {"value": b1}), |
| | cudnn_state=np.random.randint(0, 3), |
| | stride=1, |
| | pad=1, |
| | deterministic=1, |
| | use_cudnn=use_cudnn, |
| | engine=engine, |
| | ) |
| | brew.conv( |
| | m, |
| | bottom_2, |
| | mid_2, |
| | dim_in=d, |
| | dim_out=d, |
| | kernel=3, |
| | stride=1, |
| | pad=1, |
| | weight_init=("ConstantFill", {"value": w2}), |
| | bias_init=("ConstantFill", {"value": b2}), |
| | deterministic=1, |
| | cudnn_state=np.random.randint(0, 3), |
| | use_cudnn=use_cudnn, |
| | engine=engine, |
| | ) |
| | m.net.Sum([mid_1, mid_2], top) |
| |
|
| | m.net.Flatten(["0_0"], ["0_0_flat"]) |
| | m.net.SquaredL2Distance(["0_0_flat", "label"], "xent") |
| | m.net.AveragedLoss("xent", "loss") |
| | input_to_grad = m.AddGradientOperators(["loss"]) |
| | m.Proto().device_option.CopyFrom(gc) |
| | m.param_init_net.Proto().device_option.CopyFrom(gc) |
| | m.Proto().type = net_type |
| | m.Proto().num_workers = num_workers |
| | self.ws.run(m.param_init_net) |
| |
|
| | def run(): |
| | import numpy as np |
| |
|
| | np.random.seed(1701) |
| | input_blobs = ["{}_{}".format(depth, j) for j in range(2 ** depth)] |
| | for input_blob in input_blobs: |
| | self.ws.create_blob(input_blob).feed( |
| | np.random.randn(n, d, h, w).astype(np.float32), device_option=gc |
| | ) |
| | self.ws.create_blob("label").feed( |
| | np.random.randn(n, d * h * w).astype(np.float32), device_option=gc |
| | ) |
| | self.ws.run(m.net) |
| | gradients = [ |
| | self.ws.blobs[str(input_to_grad[input_blob])].fetch() |
| | for input_blob in input_blobs |
| | ] |
| | return gradients |
| |
|
| | outputs = [run() for _ in range(iters)] |
| | for output in outputs[1:]: |
| | np.testing.assert_array_equal(outputs[0], output) |
| | np.testing.assert_allclose( |
| | np.sum(np.square(output)), 1763719461732352.0, rtol=1e-5 |
| | ) |
| |
|
| | def test_use_cudnn_engine_interactions(self): |
| | """Make sure the use_cudnn and engine kwargs work as expected.""" |
| | for model_default in [None, True, False]: |
| | arg_scope = {} |
| | if model_default is not None: |
| | arg_scope["use_cudnn"] = model_default |
| | else: |
| | model_default = True |
| |
|
| | model = ModelHelper(arg_scope=arg_scope) |
| | self.assertEqual(model.arg_scope["use_cudnn"], model_default) |
| | f = functools.partial(brew.conv, model, "conv_in", "conv_out", 10, 10, 5) |
| |
|
| | for op_cudnn in [None, True, False]: |
| | for op_engine in [None, "", "CUDNN"]: |
| | kwargs = {} |
| | if op_cudnn is not None: |
| | kwargs["use_cudnn"] = op_cudnn |
| | else: |
| | op_cudnn = False |
| | if op_engine is not None: |
| | kwargs["engine"] = op_engine |
| |
|
| | calculated_cudnn = kwargs.get("use_cudnn", model_default) |
| | expected_engine = kwargs.get( |
| | "engine", "CUDNN" if calculated_cudnn else "" |
| | ) |
| |
|
| | if (calculated_cudnn is False and op_engine == "CUDNN") or ( |
| | calculated_cudnn is True and op_engine == "" |
| | ): |
| | with self.assertRaises(ValueError): |
| | f(**kwargs) |
| | else: |
| | f(**kwargs) |
| | self.assertEqual(model.Proto().op[-1].engine, expected_engine) |
| |
|
| | @given( |
| | op_type=st.sampled_from(["Conv", "Conv2D"]), |
| | N=st.integers(0, 3), |
| | G=st.integers(1, 3), |
| | DX=st.integers(1, 3), |
| | DY=st.integers(1, 3), |
| | H=st.integers(1, 3), |
| | W=st.integers(1, 3), |
| | use_bias=st.booleans(), |
| | order=st.sampled_from(["NCHW", "NHWC"]), |
| | force_algo_fwd=_cudnn_convolution_algo_count("fwd"), |
| | force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"), |
| | force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"), |
| | **hu.gcs |
| | ) |
| | @settings(deadline=10000) |
| | def test_1x1_conv( |
| | self, |
| | op_type, |
| | N, |
| | G, |
| | DX, |
| | DY, |
| | H, |
| | W, |
| | use_bias, |
| | order, |
| | force_algo_fwd, |
| | force_algo_dgrad, |
| | force_algo_wgrad, |
| | gc, |
| | dc, |
| | ): |
| | if hiputl.run_in_hip(gc, dc): |
| | assume(order == "NCHW") |
| | if order == "NHWC": |
| | G = 1 |
| |
|
| | C = G * DX |
| | M = G * DY |
| |
|
| | op = core.CreateOperator( |
| | op_type, |
| | ["X", "filter", "bias"] if use_bias else ["X", "filter"], |
| | ["Y"], |
| | stride_h=1, |
| | stride_w=1, |
| | pad_t=0, |
| | pad_l=0, |
| | pad_b=0, |
| | pad_r=0, |
| | kernel=1, |
| | order=order, |
| | group=G, |
| | force_algo_fwd=force_algo_fwd, |
| | force_algo_dgrad=force_algo_dgrad, |
| | force_algo_wgrad=force_algo_wgrad, |
| | ) |
| |
|
| | if order == "NCHW": |
| | X = np.random.randn(N, C, H, W).astype(np.float32) |
| | filter = np.random.randn(M, DX, 1, 1).astype(np.float32) |
| | else: |
| | X = np.random.randn(N, H, W, C).astype(np.float32) |
| | filter = np.random.randn(M, 1, 1, DX).astype(np.float32) |
| | bias = np.random.randn(M).astype(np.float32) |
| | inputs = [X, filter, bias] if use_bias else [X, filter] |
| |
|
| | def conv_1x1_nchw_ref(X, filter, bias=None): |
| | if N == 0: |
| | Y = np.zeros(shape=(N, M, H, W), dtype=np.float32) |
| | return [Y] |
| |
|
| | X = X.reshape(N, G, DX, -1) |
| | filter = filter.reshape(G, DY, DX) |
| | Y = np.zeros(shape=(N, G, DY, H * W), dtype=np.float32) |
| | for i in range(N): |
| | for j in range(G): |
| | Y[i, j, :, :] = np.dot(filter[j, :, :], X[i, j, :, :]) |
| | Y = Y.reshape(N, M, H, W) |
| | if bias is not None: |
| | bias = bias.reshape(1, M, 1, 1) |
| | Y = np.add(Y, bias) |
| | return [Y] |
| |
|
| | def conv_1x1_nhwc_ref(X, filter, bias=None): |
| | if N == 0: |
| | Y = np.zeros(shape=(N, H, W, M), dtype=np.float32) |
| | return [Y] |
| |
|
| | X = X.reshape(N, -1, G, DX) |
| | filter = filter.reshape(G, DY, DX) |
| | Y = np.zeros(shape=(N, H * W, G, DY), dtype=np.float32) |
| | for i in range(N): |
| | for j in range(G): |
| | Y[i, :, j, :] = np.dot(X[i, :, j, :], filter[j, :, :].transpose()) |
| | Y = Y.reshape(N, H, W, M) |
| | if bias is not None: |
| | bias = bias.reshape(1, 1, 1, M) |
| | Y = np.add(Y, bias) |
| | return [Y] |
| |
|
| | if order == "NCHW": |
| | conv_1x1_ref = conv_1x1_nchw_ref |
| | else: |
| | conv_1x1_ref = conv_1x1_nhwc_ref |
| | self.assertReferenceChecks( |
| | device_option=gc, op=op, inputs=inputs, reference=conv_1x1_ref |
| | ) |
| | self.assertDeviceChecks(dc, op, inputs, [0]) |
| | for i in range(len(inputs)): |
| | self.assertGradientChecks(gc, op, inputs, i, [0]) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | unittest.main() |
| |
|