| """CIFAR10 small images classification dataset.""" |
|
|
| import os |
|
|
| import numpy as np |
|
|
| from keras.src import backend |
| from keras.src.api_export import keras_export |
| from keras.src.datasets.cifar import load_batch |
| from keras.src.utils.file_utils import get_file |
|
|
|
|
| @keras_export("keras.datasets.cifar10.load_data") |
| def load_data(): |
| """Loads the CIFAR10 dataset. |
| |
| This is a dataset of 50,000 32x32 color training images and 10,000 test |
| images, labeled over 10 categories. See more info at the |
| [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). |
| |
| The classes are: |
| |
| | Label | Description | |
| |:-----:|-------------| |
| | 0 | airplane | |
| | 1 | automobile | |
| | 2 | bird | |
| | 3 | cat | |
| | 4 | deer | |
| | 5 | dog | |
| | 6 | frog | |
| | 7 | horse | |
| | 8 | ship | |
| | 9 | truck | |
| |
| Returns: |
| Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. |
| |
| **`x_train`**: `uint8` NumPy array of grayscale image data with shapes |
| `(50000, 32, 32, 3)`, containing the training data. Pixel values range |
| from 0 to 255. |
| |
| **`y_train`**: `uint8` NumPy array of labels (integers in range 0-9) |
| with shape `(50000, 1)` for the training data. |
| |
| **`x_test`**: `uint8` NumPy array of grayscale image data with shapes |
| `(10000, 32, 32, 3)`, containing the test data. Pixel values range |
| from 0 to 255. |
| |
| **`y_test`**: `uint8` NumPy array of labels (integers in range 0-9) |
| with shape `(10000, 1)` for the test data. |
| |
| Example: |
| |
| ```python |
| (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() |
| assert x_train.shape == (50000, 32, 32, 3) |
| assert x_test.shape == (10000, 32, 32, 3) |
| assert y_train.shape == (50000, 1) |
| assert y_test.shape == (10000, 1) |
| ``` |
| """ |
| dirname = "cifar-10-batches-py-target" |
| origin = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" |
| path = get_file( |
| fname=dirname, |
| origin=origin, |
| extract=True, |
| file_hash=( |
| "6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce" |
| ), |
| ) |
|
|
| num_train_samples = 50000 |
|
|
| x_train = np.empty((num_train_samples, 3, 32, 32), dtype="uint8") |
| y_train = np.empty((num_train_samples,), dtype="uint8") |
|
|
| |
| path = os.path.join(path, "cifar-10-batches-py") |
| for i in range(1, 6): |
| fpath = os.path.join(path, "data_batch_" + str(i)) |
| ( |
| x_train[(i - 1) * 10000 : i * 10000, :, :, :], |
| y_train[(i - 1) * 10000 : i * 10000], |
| ) = load_batch(fpath) |
|
|
| fpath = os.path.join(path, "test_batch") |
| x_test, y_test = load_batch(fpath) |
|
|
| y_train = np.reshape(y_train, (len(y_train), 1)) |
| y_test = np.reshape(y_test, (len(y_test), 1)) |
|
|
| if backend.image_data_format() == "channels_last": |
| x_train = x_train.transpose(0, 2, 3, 1) |
| x_test = x_test.transpose(0, 2, 3, 1) |
|
|
| x_test = x_test.astype(x_train.dtype) |
| y_test = y_test.astype(y_train.dtype) |
|
|
| return (x_train, y_train), (x_test, y_test) |
|
|