|
|
import numpy as np |
|
|
|
|
|
|
|
|
def get_test_data( |
|
|
train_samples, test_samples, input_shape, num_classes, random_seed=None |
|
|
): |
|
|
"""Generates balanced, stratified synthetic test data to train a model on. |
|
|
|
|
|
Args: |
|
|
train_samples: Integer, how many training samples to generate. |
|
|
test_samples: Integer, how many test samples to generate. |
|
|
input_shape: Tuple of integers, shape of the inputs. |
|
|
num_classes: Integer, number of classes for the data and targets. |
|
|
random_seed: Integer, random seed used by Numpy to generate data. |
|
|
|
|
|
Returns: |
|
|
A tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. |
|
|
""" |
|
|
np.random.seed(random_seed) |
|
|
|
|
|
|
|
|
total_samples = train_samples + test_samples |
|
|
|
|
|
|
|
|
samples_per_class = total_samples // num_classes |
|
|
y = np.array( |
|
|
[i for i in range(num_classes) for _ in range(samples_per_class)], |
|
|
dtype=np.int32, |
|
|
) |
|
|
|
|
|
|
|
|
extra_samples = total_samples - len(y) |
|
|
y_extra = np.array( |
|
|
[i % num_classes for i in range(extra_samples)], dtype=np.int64 |
|
|
) |
|
|
y = np.concatenate([y, y_extra]) |
|
|
|
|
|
|
|
|
templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) |
|
|
x = np.zeros((total_samples,) + input_shape, dtype=np.float32) |
|
|
for i in range(total_samples): |
|
|
x[i] = templates[y[i]] + np.random.normal( |
|
|
loc=0, scale=1.0, size=input_shape |
|
|
) |
|
|
|
|
|
|
|
|
indices = np.arange(total_samples) |
|
|
np.random.shuffle(indices) |
|
|
x, y = x[indices], y[indices] |
|
|
|
|
|
|
|
|
x_train, y_train, x_test, y_test = [], [], [], [] |
|
|
for cls in range(num_classes): |
|
|
cls_indices = np.where(y == cls)[0] |
|
|
np.random.shuffle(cls_indices) |
|
|
train_count = int(train_samples / num_classes) |
|
|
|
|
|
x_train.extend(x[cls_indices[:train_count]]) |
|
|
y_train.extend(y[cls_indices[:train_count]]) |
|
|
|
|
|
x_test.extend(x[cls_indices[train_count:]]) |
|
|
y_test.extend(y[cls_indices[train_count:]]) |
|
|
|
|
|
|
|
|
x_train, y_train = np.array(x_train), np.array(y_train) |
|
|
x_test, y_test = np.array(x_test), np.array(y_test) |
|
|
|
|
|
|
|
|
train_indices = np.arange(len(x_train)) |
|
|
test_indices = np.arange(len(x_test)) |
|
|
np.random.shuffle(train_indices) |
|
|
np.random.shuffle(test_indices) |
|
|
|
|
|
x_train, y_train = x_train[train_indices], y_train[train_indices] |
|
|
x_test, y_test = x_test[test_indices], y_test[test_indices] |
|
|
|
|
|
return (x_train, y_train), (x_test, y_test) |
|
|
|
|
|
|
|
|
def named_product(*args, **kwargs): |
|
|
"""Utility to generate the cartesian product of parameters values and |
|
|
generate a test case names for each combination. |
|
|
|
|
|
The result of this function is to be used with the |
|
|
`@parameterized.named_parameters` decorator. It is a replacement for |
|
|
`@parameterized.product` which adds explicit test case names. |
|
|
|
|
|
For example, this code: |
|
|
``` |
|
|
class NamedExample(parameterized.TestCase): |
|
|
@parameterized.named_parameters( |
|
|
named_product( |
|
|
[ |
|
|
{'testcase_name': 'negative', 'x': -1}, |
|
|
{'testcase_name': 'positive', 'x': 1}, |
|
|
{'testcase_name': 'zero', 'x': 0}, |
|
|
], |
|
|
numeral_type=[float, int], |
|
|
) |
|
|
) |
|
|
def test_conversion(self, x, numeral_type): |
|
|
self.assertEqual(numeral_type(x), x) |
|
|
``` |
|
|
produces six tests (note that absl will reorder them by name): |
|
|
- `NamedExample::test_conversion_negative_float` |
|
|
- `NamedExample::test_conversion_positive_float` |
|
|
- `NamedExample::test_conversion_zero_float` |
|
|
- `NamedExample::test_conversion_negative_int` |
|
|
- `NamedExample::test_conversion_positive_int` |
|
|
- `NamedExample::test_conversion_zero_int` |
|
|
|
|
|
This function is also useful in the case where there is no product to |
|
|
generate test case names for one argument: |
|
|
``` |
|
|
@parameterized.named_parameters(named_product(numeral_type=[float, int])) |
|
|
``` |
|
|
|
|
|
Args: |
|
|
*args: Each positional parameter is a sequence of keyword arg dicts. |
|
|
Every test case generated will include exactly one dict from each |
|
|
positional parameter. These will then be merged to form an overall |
|
|
list of arguments for the test case. Each dict must contain a |
|
|
`"testcase_name"` key whose value is combined with others to |
|
|
generate the test case name. |
|
|
**kwargs: A mapping of parameter names and their possible values. |
|
|
Possible values should given as either a list or a tuple. A string |
|
|
representation of each value is used to generate the test case name. |
|
|
|
|
|
Returns: |
|
|
A list of maps for the test parameters combinations to pass to |
|
|
`@parameterized.named_parameters`. |
|
|
""" |
|
|
|
|
|
def value_to_str(value): |
|
|
if hasattr(value, "__name__"): |
|
|
return value.__name__.lower() |
|
|
return str(value).lower() |
|
|
|
|
|
|
|
|
all_test_dicts = args + tuple( |
|
|
tuple({"testcase_name": value_to_str(v), key: v} for v in values) |
|
|
for key, values in kwargs.items() |
|
|
) |
|
|
|
|
|
|
|
|
tests = [{}] |
|
|
for test_dicts in all_test_dicts: |
|
|
new_tests = [] |
|
|
for test_dict in test_dicts: |
|
|
for test in tests: |
|
|
|
|
|
testcase_name = test.get("testcase_name", "") |
|
|
testcase_name += "_" if testcase_name else "" |
|
|
testcase_name += test_dict["testcase_name"] |
|
|
new_test = test.copy() |
|
|
|
|
|
new_test.update(test_dict) |
|
|
new_test["testcase_name"] = testcase_name |
|
|
new_tests.append(new_test) |
|
|
|
|
|
tests = new_tests |
|
|
|
|
|
return tests |
|
|
|