| |
|
| |
|
| |
|
| |
|
| |
|
| | import functools |
| |
|
| | import hypothesis |
| | from hypothesis import given, settings, HealthCheck |
| | import hypothesis.strategies as st |
| | import numpy as np |
| |
|
| | from caffe2.python import core |
| | import caffe2.python.hypothesis_test_util as hu |
| | import caffe2.python.serialized_test.serialized_test_util as serial |
| |
|
| |
|
| | class TestAdadelta(serial.SerializedTestCase): |
| | @staticmethod |
| | def ref_adadelta(param_in, |
| | mom_in, |
| | mom_delta_in, |
| | grad, lr, |
| | epsilon, |
| | decay, |
| | using_fp16=False): |
| | param_in_f32 = param_in |
| | mom_in_f32 = mom_in |
| | mom_delta_in_f32 = mom_delta_in |
| | if(using_fp16): |
| | param_in_f32 = param_in.astype(np.float32) |
| | mom_in_f32 = mom_in.astype(np.float32) |
| | mom_delta_in_f32 = mom_delta_in.astype(np.float32) |
| |
|
| | mom_out = decay * mom_in_f32 + (1.0 - decay) * grad * grad |
| | new_grad = (np.sqrt(mom_delta_in_f32 + epsilon) / |
| | np.sqrt(mom_out + epsilon)) * grad |
| | param_out = param_in_f32 + lr * new_grad |
| | mom_delta_out = decay * mom_delta_in_f32 + (1.0 - decay |
| | ) * new_grad * new_grad |
| | if(using_fp16): |
| | return (param_out.astype(np.float16), mom_out.astype(np.float16), |
| | mom_delta_out.astype(np.float16)) |
| | else: |
| | return (param_out.astype(np.float32), mom_out.astype(np.float32), |
| | mom_delta_out.astype(np.float32)) |
| |
|
| | @given(inputs=hu.tensors(n=4), |
| | lr=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | epsilon=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | decay=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | **hu.gcs) |
| | @settings(deadline=10000) |
| | def test_adadelta(self, inputs, lr, epsilon, decay, gc, dc): |
| | param, moment, moment_delta, grad = inputs |
| | moment = np.abs(moment) |
| | moment_delta = np.abs(moment_delta) |
| | lr = np.array([lr], dtype=np.float32) |
| |
|
| | op = core.CreateOperator( |
| | "Adadelta", |
| | ["param", "moment", "moment_delta", "grad", "lr"], |
| | ["param", "moment", "moment_delta"], |
| | epsilon=epsilon, |
| | decay=decay, |
| | device_option=gc, |
| | ) |
| |
|
| | self.assertReferenceChecks( |
| | gc, op, |
| | [param, moment, moment_delta, grad, lr], |
| | functools.partial(self.ref_adadelta, epsilon=epsilon, decay=decay)) |
| |
|
| | |
| | |
| | @settings(suppress_health_check=[HealthCheck.filter_too_much], deadline=10000) |
| | @given(inputs=hu.tensors(n=4), |
| | lr=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | epsilon=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | decay=hu.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | **hu.gcs) |
| | def test_sparse_adadelta(self, inputs, lr, epsilon, decay, gc, dc): |
| | param, moment, moment_delta, grad = inputs |
| | moment = np.abs(moment) |
| | moment_delta = np.abs(moment_delta) |
| | lr = np.array([lr], dtype=np.float32) |
| |
|
| | |
| | |
| | indices = np.random.choice(np.arange(grad.shape[0]), |
| | size=np.random.randint(grad.shape[0]), replace=False) |
| |
|
| | |
| | grad = grad[indices] |
| |
|
| | op = core.CreateOperator( |
| | "SparseAdadelta", |
| | ["param", "moment", "moment_delta", "indices", "grad", "lr"], |
| | ["param", "moment", "moment_delta"], |
| | epsilon=epsilon, |
| | decay=decay, |
| | device_option=gc) |
| |
|
| | def ref_sparse(param, moment, moment_delta, indices, grad, lr, decay, |
| | ref_using_fp16): |
| | param_out = np.copy(param) |
| | moment_out = np.copy(moment) |
| | moment_delta_out = np.copy(moment_delta) |
| | for i, index in enumerate(indices): |
| | param_out[index], moment_out[index], moment_delta_out[ |
| | index] = self.ref_adadelta(param[index], moment[index], |
| | moment_delta[index], grad[i], lr, |
| | epsilon, decay, ref_using_fp16) |
| | return (param_out, moment_out, moment_delta_out) |
| |
|
| | ref_using_fp16_values = [False] |
| | if gc == hu.gpu_do: |
| | ref_using_fp16_values.append(True) |
| |
|
| | for ref_using_fp16 in ref_using_fp16_values: |
| | moment_i = None |
| | moment_delta_i = None |
| | param_i = None |
| | if(ref_using_fp16): |
| | moment_i = moment.astype(np.float16) |
| | moment_delta_i = moment_delta.astype(np.float16) |
| | param_i = param.astype(np.float16) |
| | else: |
| | moment_i = moment.astype(np.float32) |
| | moment_delta_i = moment_delta.astype(np.float32) |
| | param_i = param.astype(np.float32) |
| |
|
| | self.assertReferenceChecks(gc, op, [ |
| | param_i, moment_i, moment_delta_i, indices, grad, lr, decay, |
| | ref_using_fp16 |
| | ], ref_sparse) |
| |
|
| | @given(inputs=hu.tensors(n=3), |
| | lr=st.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | epsilon=st.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | decay=st.floats(min_value=0.01, max_value=0.99, |
| | allow_nan=False, allow_infinity=False), |
| | **hu.gcs) |
| | @settings(deadline=None) |
| | def test_sparse_adadelta_empty(self, inputs, lr, epsilon, decay, gc, dc): |
| | param, moment, moment_delta = inputs |
| | moment = np.abs(moment) |
| | lr = np.array([lr], dtype=np.float32) |
| |
|
| | grad = np.empty(shape=(0,) + param.shape[1:], dtype=np.float32) |
| | indices = np.empty(shape=(0,), dtype=np.int64) |
| |
|
| | hypothesis.note('indices.shape: %s' % str(indices.shape)) |
| |
|
| | op = core.CreateOperator( |
| | "SparseAdadelta", |
| | ["param", "moment", "moment_delta", "indices", "grad", "lr"], |
| | ["param", "moment", "moment_delta"], |
| | epsilon=epsilon, |
| | decay=decay, |
| | device_option=gc) |
| |
|
| | def ref_sparse_empty(param, moment, moment_delta, indices, grad, lr, decay): |
| | param_out = np.copy(param) |
| | moment_out = np.copy(moment) |
| | moment_delta_out = np.copy(moment_delta) |
| | return (param_out, moment_out, moment_delta_out) |
| |
|
| | ref_using_fp16_values = [False] |
| | if gc == hu.gpu_do: |
| | ref_using_fp16_values.append(True) |
| |
|
| | for ref_using_fp16 in ref_using_fp16_values: |
| | moment_i = None |
| | moment_delta_i = None |
| | param_i = None |
| | if(ref_using_fp16): |
| | moment_i = moment.astype(np.float16) |
| | moment_delta_i = moment_delta.astype(np.float16) |
| | param_i = param.astype(np.float16) |
| | else: |
| | moment_i = moment.astype(np.float32) |
| | moment_delta_i = moment_delta.astype(np.float32) |
| | param_i = param.astype(np.float32) |
| |
|
| | self.assertReferenceChecks( |
| | gc, |
| | op, |
| | [param_i, moment_i, moment_delta_i, indices, grad, lr, decay], |
| | ref_sparse_empty |
| | ) |
| |
|