| import warnings |
|
|
| from keras.src import backend |
| from keras.src import ops |
| from keras.src import tree |
| from keras.src.api_export import keras_export |
| from keras.src.losses.loss import Loss |
| from keras.src.losses.loss import squeeze_or_expand_to_same_rank |
| from keras.src.saving import serialization_lib |
| from keras.src.utils.numerical_utils import build_pos_neg_masks |
| from keras.src.utils.numerical_utils import normalize |
|
|
|
|
| class LossFunctionWrapper(Loss): |
| def __init__( |
| self, |
| fn, |
| reduction="sum_over_batch_size", |
| name=None, |
| dtype=None, |
| **kwargs, |
| ): |
| super().__init__(name=name, reduction=reduction, dtype=dtype) |
| self.fn = fn |
| self._fn_kwargs = kwargs |
|
|
| def call(self, y_true, y_pred): |
| y_true_y_pred = tree.map_structure( |
| squeeze_or_expand_to_same_rank, y_true, y_pred |
| ) |
| y_true = tree.map_structure_up_to(y_true, lambda x: x[0], y_true_y_pred) |
| y_pred = tree.map_structure_up_to(y_pred, lambda x: x[1], y_true_y_pred) |
| return self.fn(y_true, y_pred, **self._fn_kwargs) |
|
|
| def get_config(self): |
| config = super().get_config() |
| config.update({"fn": serialization_lib.serialize_keras_object(self.fn)}) |
| config.update(serialization_lib.serialize_keras_object(self._fn_kwargs)) |
| return config |
|
|
| @classmethod |
| def from_config(cls, config): |
| if "fn" in config: |
| config = serialization_lib.deserialize_keras_object(config) |
| return cls(**config) |
|
|
| def __repr__(self): |
| return f"<LossFunctionWrapper({self.fn}, kwargs={self._fn_kwargs})>" |
|
|
|
|
| @keras_export("keras.losses.MeanSquaredError") |
| class MeanSquaredError(LossFunctionWrapper): |
| """Computes the mean of squares of errors between labels and predictions. |
| |
| Formula: |
| |
| ```python |
| loss = mean(square(y_true - y_pred)) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="mean_squared_error", |
| dtype=None, |
| ): |
| super().__init__( |
| mean_squared_error, name=name, reduction=reduction, dtype=dtype |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.MeanAbsoluteError") |
| class MeanAbsoluteError(LossFunctionWrapper): |
| """Computes the mean of absolute difference between labels and predictions. |
| |
| Formula: |
| |
| ```python |
| loss = mean(abs(y_true - y_pred)) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="mean_absolute_error", |
| dtype=None, |
| ): |
| super().__init__( |
| mean_absolute_error, name=name, reduction=reduction, dtype=dtype |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.MeanAbsolutePercentageError") |
| class MeanAbsolutePercentageError(LossFunctionWrapper): |
| """Computes the mean absolute percentage error between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = 100 * mean(abs((y_true - y_pred) / y_true)) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="mean_absolute_percentage_error", |
| dtype=None, |
| ): |
| super().__init__( |
| mean_absolute_percentage_error, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.MeanSquaredLogarithmicError") |
| class MeanSquaredLogarithmicError(LossFunctionWrapper): |
| """Computes the mean squared logarithmic error between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = mean(square(log(y_true + 1) - log(y_pred + 1))) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="mean_squared_logarithmic_error", |
| dtype=None, |
| ): |
| super().__init__( |
| mean_squared_logarithmic_error, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.CosineSimilarity") |
| class CosineSimilarity(LossFunctionWrapper): |
| """Computes the cosine similarity between `y_true` & `y_pred`. |
| |
| Note that it is a number between -1 and 1. When it is a negative number |
| between -1 and 0, 0 indicates orthogonality and values closer to -1 |
| indicate greater similarity. This makes it usable as a loss function in a |
| setting where you try to maximize the proximity between predictions and |
| targets. If either `y_true` or `y_pred` is a zero vector, cosine similarity |
| will be 0 regardless of the proximity between predictions and targets. |
| |
| Formula: |
| |
| ```python |
| loss = -sum(l2_norm(y_true) * l2_norm(y_pred)) |
| ``` |
| |
| Args: |
| axis: The axis along which the cosine similarity is computed |
| (the features axis). Defaults to `-1`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| axis=-1, |
| reduction="sum_over_batch_size", |
| name="cosine_similarity", |
| dtype=None, |
| ): |
| super().__init__( |
| cosine_similarity, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| axis=axis, |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.Huber") |
| class Huber(LossFunctionWrapper): |
| """Computes the Huber loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| for x in error: |
| if abs(x) <= delta: |
| loss.append(0.5 * x^2) |
| elif abs(x) > delta: |
| loss.append(delta * abs(x) - 0.5 * delta^2) |
| |
| loss = mean(loss, axis=-1) |
| ``` |
| See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). |
| |
| Args: |
| delta: A float, the point where the Huber loss function changes from a |
| quadratic to linear. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| delta=1.0, |
| reduction="sum_over_batch_size", |
| name="huber_loss", |
| dtype=None, |
| ): |
| super().__init__( |
| huber, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| delta=delta, |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.LogCosh") |
| class LogCosh(LossFunctionWrapper): |
| """Computes the logarithm of the hyperbolic cosine of the prediction error. |
| |
| Formula: |
| |
| ```python |
| error = y_pred - y_true |
| logcosh = mean(log((exp(error) + exp(-error))/2), axis=-1)` |
| ``` |
| where x is the error `y_pred - y_true`. |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="log_cosh", |
| dtype=None, |
| ): |
| super().__init__(log_cosh, name=name, reduction=reduction, dtype=dtype) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.Hinge") |
| class Hinge(LossFunctionWrapper): |
| """Computes the hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = maximum(1 - y_true * y_pred, 0) |
| ``` |
| |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are |
| provided we will convert them to -1 or 1. |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="hinge", |
| dtype=None, |
| ): |
| super().__init__(hinge, name=name, reduction=reduction, dtype=dtype) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.SquaredHinge") |
| class SquaredHinge(LossFunctionWrapper): |
| """Computes the squared hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = square(maximum(1 - y_true * y_pred, 0)) |
| ``` |
| |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are |
| provided we will convert them to -1 or 1. |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, reduction="sum_over_batch_size", name="squared_hinge", dtype=None |
| ): |
| super().__init__( |
| squared_hinge, name=name, reduction=reduction, dtype=dtype |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.CategoricalHinge") |
| class CategoricalHinge(LossFunctionWrapper): |
| """Computes the categorical hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = maximum(neg - pos + 1, 0) |
| ``` |
| |
| where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="categorical_hinge", |
| dtype=None, |
| ): |
| super().__init__( |
| categorical_hinge, name=name, reduction=reduction, dtype=dtype |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.KLDivergence") |
| class KLDivergence(LossFunctionWrapper): |
| """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = y_true * log(y_true / y_pred) |
| ``` |
| |
| `y_true` and `y_pred` are expected to be probability |
| distributions, with values between 0 and 1. They will get |
| clipped to the `[0, 1]` range. |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, reduction="sum_over_batch_size", name="kl_divergence", dtype=None |
| ): |
| super().__init__( |
| kl_divergence, name=name, reduction=reduction, dtype=dtype |
| ) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.Poisson") |
| class Poisson(LossFunctionWrapper): |
| """Computes the Poisson loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = y_pred - y_true * log(y_pred) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__( |
| self, reduction="sum_over_batch_size", name="poisson", dtype=None |
| ): |
| super().__init__(poisson, name=name, reduction=reduction, dtype=dtype) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.BinaryCrossentropy") |
| class BinaryCrossentropy(LossFunctionWrapper): |
| """Computes the cross-entropy loss between true labels and predicted labels. |
| |
| Use this cross-entropy loss for binary (0 or 1) classification applications. |
| The loss function requires the following inputs: |
| |
| - `y_true` (true label): This is either 0 or 1. |
| - `y_pred` (predicted value): This is the model's prediction, i.e, a single |
| floating-point value which either represents a |
| [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] |
| when `from_logits=True`) or a probability (i.e, value in [0., 1.] when |
| `from_logits=False`). |
| |
| Args: |
| from_logits: Whether to interpret `y_pred` as a tensor of |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we |
| assume that `y_pred` is probabilities (i.e., values in [0, 1]). |
| label_smoothing: Float in range [0, 1]. When 0, no smoothing occurs. |
| When > 0, we compute the loss between the predicted labels |
| and a smoothed version of the true labels, where the smoothing |
| squeezes the labels towards 0.5. Larger values of |
| `label_smoothing` correspond to heavier smoothing. |
| axis: The axis along which to compute crossentropy (the features axis). |
| Defaults to `-1`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| **Recommended Usage:** (set `from_logits=True`) |
| |
| With `compile()` API: |
| |
| ```python |
| model.compile( |
| loss=keras.losses.BinaryCrossentropy(from_logits=True), |
| ... |
| ) |
| ``` |
| |
| As a standalone function: |
| |
| >>> # Example 1: (batch_size = 1, number of samples = 4) |
| >>> y_true = np.array([0, 1, 0, 0]) |
| >>> y_pred = np.array([-18.6, 0.51, 2.94, -12.8]) |
| >>> bce = keras.losses.BinaryCrossentropy(from_logits=True) |
| >>> bce(y_true, y_pred) |
| 0.8654 |
| |
| >>> # Example 2: (batch_size = 2, number of samples = 4) |
| >>> y_true = np.array([[0, 1], [0, 0]]) |
| >>> y_pred = np.array([[-18.6, 0.51], [2.94, -12.8]]) |
| >>> # Using default 'auto'/'sum_over_batch_size' reduction type. |
| >>> bce = keras.losses.BinaryCrossentropy(from_logits=True) |
| >>> bce(y_true, y_pred) |
| 0.8654 |
| >>> # Using 'sample_weight' attribute |
| >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]) |
| 0.243 |
| >>> # Using 'sum' reduction` type. |
| >>> bce = keras.losses.BinaryCrossentropy(from_logits=True, |
| ... reduction="sum") |
| >>> bce(y_true, y_pred) |
| 1.730 |
| >>> # Using 'none' reduction type. |
| >>> bce = keras.losses.BinaryCrossentropy(from_logits=True, |
| ... reduction=None) |
| >>> bce(y_true, y_pred) |
| array([0.235, 1.496], dtype=float32) |
| |
| **Default Usage:** (set `from_logits=False`) |
| |
| >>> # Make the following updates to the above "Recommended Usage" section |
| >>> # 1. Set `from_logits=False` |
| >>> keras.losses.BinaryCrossentropy() # OR ...('from_logits=False') |
| >>> # 2. Update `y_pred` to use probabilities instead of logits |
| >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] |
| """ |
|
|
| def __init__( |
| self, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| reduction="sum_over_batch_size", |
| name="binary_crossentropy", |
| dtype=None, |
| ): |
| super().__init__( |
| binary_crossentropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| from_logits=from_logits, |
| label_smoothing=label_smoothing, |
| axis=axis, |
| ) |
| self.from_logits = from_logits |
| self.label_smoothing = label_smoothing |
| self.axis = axis |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "from_logits": self.from_logits, |
| "label_smoothing": self.label_smoothing, |
| "axis": self.axis, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.BinaryFocalCrossentropy") |
| class BinaryFocalCrossentropy(LossFunctionWrapper): |
| """Computes focal cross-entropy loss between true labels and predictions. |
| |
| Binary cross-entropy loss is often used for binary (0 or 1) classification |
| tasks. The loss function requires the following inputs: |
| |
| - `y_true` (true label): This is either 0 or 1. |
| - `y_pred` (predicted value): This is the model's prediction, i.e, a single |
| floating-point value which either represents a |
| [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] |
| when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when |
| `from_logits=False`). |
| |
| According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it |
| helps to apply a "focal factor" to down-weight easy examples and focus more |
| on hard examples. By default, the focal tensor is computed as follows: |
| |
| `focal_factor = (1 - output) ** gamma` for class 1 |
| `focal_factor = output ** gamma` for class 0 |
| where `gamma` is a focusing parameter. When `gamma=0`, this function is |
| equivalent to the binary crossentropy loss. |
| |
| Args: |
| apply_class_balancing: A bool, whether to apply weight balancing on the |
| binary classes 0 and 1. |
| alpha: A weight balancing factor for class 1, default is `0.25` as |
| mentioned in reference [Lin et al., 2018]( |
| https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is |
| `1.0 - alpha`. |
| gamma: A focusing parameter used to compute the focal factor, default is |
| `2.0` as mentioned in the reference |
| [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). |
| from_logits: Whether to interpret `y_pred` as a tensor of |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we |
| assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). |
| label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. |
| When > `0`, we compute the loss between the predicted labels |
| and a smoothed version of the true labels, where the smoothing |
| squeezes the labels towards `0.5`. |
| Larger values of `label_smoothing` correspond to heavier smoothing. |
| axis: The axis along which to compute crossentropy (the features axis). |
| Defaults to `-1`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| With the `compile()` API: |
| |
| ```python |
| model.compile( |
| loss=keras.losses.BinaryFocalCrossentropy( |
| gamma=2.0, from_logits=True), |
| ... |
| ) |
| ``` |
| |
| As a standalone function: |
| |
| >>> # Example 1: (batch_size = 1, number of samples = 4) |
| >>> y_true = np.array([0, 1, 0, 0]) |
| >>> y_pred = np.array([-18.6, 0.51, 2.94, -12.8]) |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... gamma=2, from_logits=True) |
| >>> loss(y_true, y_pred) |
| 0.691 |
| |
| >>> # Apply class weight |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... apply_class_balancing=True, gamma=2, from_logits=True) |
| >>> loss(y_true, y_pred) |
| 0.51 |
| |
| >>> # Example 2: (batch_size = 2, number of samples = 4) |
| >>> y_true = np.array([[0, 1], [0, 0]]) |
| >>> y_pred = np.array([[-18.6, 0.51], [2.94, -12.8]]) |
| >>> # Using default 'auto'/'sum_over_batch_size' reduction type. |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... gamma=3, from_logits=True) |
| >>> loss(y_true, y_pred) |
| 0.647 |
| |
| >>> # Apply class weight |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... apply_class_balancing=True, gamma=3, from_logits=True) |
| >>> loss(y_true, y_pred) |
| 0.482 |
| |
| >>> # Using 'sample_weight' attribute with focal effect |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... gamma=3, from_logits=True) |
| >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) |
| 0.133 |
| |
| >>> # Apply class weight |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... apply_class_balancing=True, gamma=3, from_logits=True) |
| >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) |
| 0.097 |
| |
| >>> # Using 'sum' reduction` type. |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... gamma=4, from_logits=True, |
| ... reduction="sum") |
| >>> loss(y_true, y_pred) |
| 1.222 |
| |
| >>> # Apply class weight |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... apply_class_balancing=True, gamma=4, from_logits=True, |
| ... reduction="sum") |
| >>> loss(y_true, y_pred) |
| 0.914 |
| |
| >>> # Using 'none' reduction type. |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... gamma=5, from_logits=True, |
| ... reduction=None) |
| >>> loss(y_true, y_pred) |
| array([0.0017 1.1561], dtype=float32) |
| |
| >>> # Apply class weight |
| >>> loss = keras.losses.BinaryFocalCrossentropy( |
| ... apply_class_balancing=True, gamma=5, from_logits=True, |
| ... reduction=None) |
| >>> loss(y_true, y_pred) |
| array([0.0004 0.8670], dtype=float32) |
| """ |
|
|
| def __init__( |
| self, |
| apply_class_balancing=False, |
| alpha=0.25, |
| gamma=2.0, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| reduction="sum_over_batch_size", |
| name="binary_focal_crossentropy", |
| dtype=None, |
| ): |
| super().__init__( |
| binary_focal_crossentropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| apply_class_balancing=apply_class_balancing, |
| alpha=alpha, |
| gamma=gamma, |
| from_logits=from_logits, |
| label_smoothing=label_smoothing, |
| axis=axis, |
| ) |
| self.from_logits = from_logits |
| self.label_smoothing = label_smoothing |
| self.axis = axis |
| self.apply_class_balancing = apply_class_balancing |
| self.alpha = alpha |
| self.gamma = gamma |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "from_logits": self.from_logits, |
| "label_smoothing": self.label_smoothing, |
| "axis": self.axis, |
| "apply_class_balancing": self.apply_class_balancing, |
| "alpha": self.alpha, |
| "gamma": self.gamma, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.CategoricalCrossentropy") |
| class CategoricalCrossentropy(LossFunctionWrapper): |
| """Computes the crossentropy loss between the labels and predictions. |
| |
| Use this crossentropy loss function when there are two or more label |
| classes. We expect labels to be provided in a `one_hot` representation. If |
| you want to provide labels as integers, please use |
| `SparseCategoricalCrossentropy` loss. There should be `num_classes` floating |
| point values per feature, i.e., the shape of both `y_pred` and `y_true` are |
| `[batch_size, num_classes]`. |
| |
| Args: |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, |
| meaning the confidence on label values are relaxed. For example, if |
| `0.1`, use `0.1 / num_classes` for non-target labels and |
| `0.9 + 0.1 / num_classes` for target labels. |
| axis: The axis along which to compute crossentropy (the features |
| axis). Defaults to `-1`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| Standalone usage: |
| |
| >>> y_true = np.array([[0, 1, 0], [0, 0, 1]]) |
| >>> y_pred = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) |
| >>> # Using 'auto'/'sum_over_batch_size' reduction type. |
| >>> cce = keras.losses.CategoricalCrossentropy() |
| >>> cce(y_true, y_pred) |
| 1.177 |
| |
| >>> # Calling with 'sample_weight'. |
| >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) |
| 0.814 |
| |
| >>> # Using 'sum' reduction type. |
| >>> cce = keras.losses.CategoricalCrossentropy( |
| ... reduction="sum") |
| >>> cce(y_true, y_pred) |
| 2.354 |
| |
| >>> # Using 'none' reduction type. |
| >>> cce = keras.losses.CategoricalCrossentropy( |
| ... reduction=None) |
| >>> cce(y_true, y_pred) |
| array([0.0513, 2.303], dtype=float32) |
| |
| Usage with the `compile()` API: |
| |
| ```python |
| model.compile(optimizer='sgd', |
| loss=keras.losses.CategoricalCrossentropy()) |
| ``` |
| """ |
|
|
| def __init__( |
| self, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| reduction="sum_over_batch_size", |
| name="categorical_crossentropy", |
| dtype=None, |
| ): |
| super().__init__( |
| categorical_crossentropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| from_logits=from_logits, |
| label_smoothing=label_smoothing, |
| axis=axis, |
| ) |
| self.from_logits = from_logits |
| self.label_smoothing = label_smoothing |
| self.axis = axis |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "from_logits": self.from_logits, |
| "label_smoothing": self.label_smoothing, |
| "axis": self.axis, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.CategoricalFocalCrossentropy") |
| class CategoricalFocalCrossentropy(LossFunctionWrapper): |
| """Computes the alpha balanced focal crossentropy loss. |
| |
| Use this crossentropy loss function when there are two or more label |
| classes and if you want to handle class imbalance without using |
| `class_weights`. We expect labels to be provided in a `one_hot` |
| representation. |
| |
| According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it |
| helps to apply a focal factor to down-weight easy examples and focus more on |
| hard examples. The general formula for the focal loss (FL) |
| is as follows: |
| |
| `FL(p_t) = (1 - p_t) ** gamma * log(p_t)` |
| |
| where `p_t` is defined as follows: |
| `p_t = output if y_true == 1, else 1 - output` |
| |
| `(1 - p_t) ** gamma` is the `modulating_factor`, where `gamma` is a focusing |
| parameter. When `gamma` = 0, there is no focal effect on the cross entropy. |
| `gamma` reduces the importance given to simple examples in a smooth manner. |
| |
| The authors use alpha-balanced variant of focal loss (FL) in the paper: |
| `FL(p_t) = -alpha * (1 - p_t) ** gamma * log(p_t)` |
| |
| where `alpha` is the weight factor for the classes. If `alpha` = 1, the |
| loss won't be able to handle class imbalance properly as all |
| classes will have the same weight. This can be a constant or a list of |
| constants. If alpha is a list, it must have the same length as the number |
| of classes. |
| |
| The formula above can be generalized to: |
| `FL(p_t) = alpha * (1 - p_t) ** gamma * CrossEntropy(y_true, y_pred)` |
| |
| where minus comes from `CrossEntropy(y_true, y_pred)` (CE). |
| |
| Extending this to multi-class case is straightforward: |
| `FL(p_t) = alpha * (1 - p_t) ** gamma * CategoricalCE(y_true, y_pred)` |
| |
| In the snippet below, there is `num_classes` floating pointing values per |
| example. The shape of both `y_pred` and `y_true` are |
| `(batch_size, num_classes)`. |
| |
| Args: |
| alpha: A weight balancing factor for all classes, default is `0.25` as |
| mentioned in the reference. It can be a list of floats or a scalar. |
| In the multi-class case, alpha may be set by inverse class |
| frequency by using `compute_class_weight` from `sklearn.utils`. |
| gamma: A focusing parameter, default is `2.0` as mentioned in the |
| reference. It helps to gradually reduce the importance given to |
| simple (easy) examples in a smooth manner. |
| from_logits: Whether `output` is expected to be a logits tensor. By |
| default, we consider that `output` encodes a probability |
| distribution. |
| label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, |
| meaning the confidence on label values are relaxed. For example, if |
| `0.1`, use `0.1 / num_classes` for non-target labels and |
| `0.9 + 0.1 / num_classes` for target labels. |
| axis: The axis along which to compute crossentropy (the features |
| axis). Defaults to `-1`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| Standalone usage: |
| |
| >>> y_true = [[0., 1., 0.], [0., 0., 1.]] |
| >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] |
| >>> # Using 'auto'/'sum_over_batch_size' reduction type. |
| >>> cce = keras.losses.CategoricalFocalCrossentropy() |
| >>> cce(y_true, y_pred) |
| 0.23315276 |
| |
| >>> # Calling with 'sample_weight'. |
| >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) |
| 0.1632 |
| |
| >>> # Using 'sum' reduction type. |
| >>> cce = keras.losses.CategoricalFocalCrossentropy( |
| ... reduction="sum") |
| >>> cce(y_true, y_pred) |
| 0.46631 |
| |
| >>> # Using 'none' reduction type. |
| >>> cce = keras.losses.CategoricalFocalCrossentropy( |
| ... reduction=None) |
| >>> cce(y_true, y_pred) |
| array([3.2058331e-05, 4.6627346e-01], dtype=float32) |
| |
| Usage with the `compile()` API: |
| |
| ```python |
| model.compile(optimizer='adam', |
| loss=keras.losses.CategoricalFocalCrossentropy()) |
| ``` |
| """ |
|
|
| def __init__( |
| self, |
| alpha=0.25, |
| gamma=2.0, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| reduction="sum_over_batch_size", |
| name="categorical_focal_crossentropy", |
| dtype=None, |
| ): |
| """Initializes `CategoricalFocalCrossentropy` instance.""" |
| super().__init__( |
| categorical_focal_crossentropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| alpha=alpha, |
| gamma=gamma, |
| from_logits=from_logits, |
| label_smoothing=label_smoothing, |
| axis=axis, |
| ) |
| self.from_logits = from_logits |
| self.label_smoothing = label_smoothing |
| self.axis = axis |
| self.alpha = alpha |
| self.gamma = gamma |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "from_logits": self.from_logits, |
| "label_smoothing": self.label_smoothing, |
| "axis": self.axis, |
| "alpha": self.alpha, |
| "gamma": self.gamma, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.SparseCategoricalCrossentropy") |
| class SparseCategoricalCrossentropy(LossFunctionWrapper): |
| """Computes the crossentropy loss between the labels and predictions. |
| |
| Use this crossentropy loss function when there are two or more label |
| classes. We expect labels to be provided as integers. If you want to |
| provide labels using `one-hot` representation, please use |
| `CategoricalCrossentropy` loss. There should be `# classes` floating point |
| values per feature for `y_pred` and a single floating point value per |
| feature for `y_true`. |
| |
| In the snippet below, there is a single floating point value per example for |
| `y_true` and `num_classes` floating pointing values per example for |
| `y_pred`. The shape of `y_true` is `[batch_size]` and the shape of `y_pred` |
| is `[batch_size, num_classes]`. |
| |
| Args: |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| axis: The axis along which to compute crossentropy (the features |
| axis). Defaults to `-1`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| >>> y_true = [1, 2] |
| >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] |
| >>> # Using 'auto'/'sum_over_batch_size' reduction type. |
| >>> scce = keras.losses.SparseCategoricalCrossentropy() |
| >>> scce(y_true, y_pred) |
| 1.177 |
| |
| >>> # Calling with 'sample_weight'. |
| >>> scce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) |
| 0.814 |
| |
| >>> # Using 'sum' reduction type. |
| >>> scce = keras.losses.SparseCategoricalCrossentropy( |
| ... reduction="sum") |
| >>> scce(y_true, y_pred) |
| 2.354 |
| |
| >>> # Using 'none' reduction type. |
| >>> scce = keras.losses.SparseCategoricalCrossentropy( |
| ... reduction=None) |
| >>> scce(y_true, y_pred) |
| array([0.0513, 2.303], dtype=float32) |
| |
| Usage with the `compile()` API: |
| |
| ```python |
| model.compile(optimizer='sgd', |
| loss=keras.losses.SparseCategoricalCrossentropy()) |
| ``` |
| """ |
|
|
| def __init__( |
| self, |
| from_logits=False, |
| ignore_class=None, |
| reduction="sum_over_batch_size", |
| axis=-1, |
| name="sparse_categorical_crossentropy", |
| dtype=None, |
| ): |
| super().__init__( |
| sparse_categorical_crossentropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| from_logits=from_logits, |
| ignore_class=ignore_class, |
| axis=axis, |
| ) |
| self.from_logits = from_logits |
| self.ignore_class = ignore_class |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "from_logits": self.from_logits, |
| "ignore_class": self.ignore_class, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.CTC") |
| class CTC(LossFunctionWrapper): |
| """CTC (Connectionist Temporal Classification) loss. |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| """ |
|
|
| def __init__(self, reduction="sum_over_batch_size", name="ctc", dtype=None): |
| super().__init__(ctc, name=name, reduction=reduction, dtype=dtype) |
|
|
| def get_config(self): |
| return Loss.get_config(self) |
|
|
|
|
| @keras_export("keras.losses.Dice") |
| class Dice(LossFunctionWrapper): |
| """Computes the Dice loss value between `y_true` and `y_pred`. |
| |
| Formula: |
| ```python |
| loss = 1 - (2 * sum(y_true * y_pred)) / (sum(y_true) + sum(y_pred)) |
| ``` |
| |
| Args: |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| axis: Tuple for which dimensions the loss is calculated. Defaults to |
| `None`. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Returns: |
| Dice loss value. |
| |
| Example: |
| |
| >>> y_true = [[[[1.0], [1.0]], [[0.0], [0.0]]], |
| ... [[[1.0], [1.0]], [[0.0], [0.0]]]] |
| >>> y_pred = [[[[0.0], [1.0]], [[0.0], [1.0]]], |
| ... [[[0.4], [0.0]], [[0.0], [0.9]]]] |
| >>> axis = (1, 2, 3) |
| >>> loss = keras.losses.Dice(axis=axis, reduction=None)(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.5, 0.75757575], shape=(2,), dtype=float32) |
| |
| >>> loss = keras.losses.Dice()(y_true, y_pred) |
| >>> assert loss.shape == () |
| >>> loss |
| array(0.6164384, shape=(), dtype=float32) |
| |
| >>> y_true = np.array(y_true) |
| >>> y_pred = np.array(y_pred) |
| >>> loss = keras.losses.Dice(axis=axis, reduction=None)(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.5, 0.75757575], shape=(2,), dtype=float32) |
| |
| """ |
|
|
| def __init__( |
| self, |
| reduction="sum_over_batch_size", |
| name="dice", |
| axis=None, |
| dtype=None, |
| ): |
| super().__init__( |
| dice, name=name, reduction=reduction, dtype=dtype, axis=axis |
| ) |
| self.axis = axis |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update({"axis": self.axis}) |
| return config |
|
|
|
|
| @keras_export("keras.losses.Tversky") |
| class Tversky(LossFunctionWrapper): |
| """Computes the Tversky loss value between `y_true` and `y_pred`. |
| |
| This loss function is weighted by the alpha and beta coefficients |
| that penalize false positives and false negatives. |
| |
| With `alpha=0.5` and `beta=0.5`, the loss value becomes equivalent to |
| Dice Loss. |
| |
| Args: |
| alpha: The coefficient controlling incidence of false positives. |
| Defaults to `0.5`. |
| beta: The coefficient controlling incidence of false negatives. |
| Defaults to `0.5`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Returns: |
| Tversky loss value. |
| |
| Reference: |
| |
| - [Salehi et al., 2017](https://arxiv.org/abs/1706.05721) |
| """ |
|
|
| def __init__( |
| self, |
| alpha=0.5, |
| beta=0.5, |
| reduction="sum_over_batch_size", |
| name="tversky", |
| axis=None, |
| dtype=None, |
| ): |
| super().__init__( |
| tversky, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| alpha=alpha, |
| beta=beta, |
| axis=axis, |
| ) |
| self.alpha = alpha |
| self.beta = beta |
| self.axis = axis |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| {"alpha": self.alpha, "beta": self.beta, "axis": self.axis} |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.Circle") |
| class Circle(LossFunctionWrapper): |
| """Computes Circle Loss between integer labels and L2-normalized embeddings. |
| |
| This is a metric learning loss designed to minimize within-class distance |
| and maximize between-class distance in a flexible manner by dynamically |
| adjusting the penalty strength based on optimization status of each |
| similarity score. |
| |
| To use Circle Loss effectively, the model should output embeddings without |
| an activation function (such as a `Dense` layer with `activation=None`) |
| followed by UnitNormalization layer to ensure unit-norm embeddings. |
| |
| Args: |
| gamma: Scaling factor that determines the largest scale of each |
| similarity score. Defaults to `80`. |
| margin: The relaxation factor, below this distance, negatives are |
| up weighted and positives are down weighted. Similarly, above this |
| distance negatives are down weighted and positive are up weighted. |
| Defaults to `0.4`. |
| remove_diagonal: Boolean, whether to remove self-similarities from the |
| positive mask. Defaults to `True`. |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Examples: |
| |
| Usage with the `compile()` API: |
| |
| ```python |
| model = models.Sequential([ |
| keras.layers.Input(shape=(224, 224, 3)), |
| keras.layers.Conv2D(16, (3, 3), activation='relu'), |
| keras.layers.Flatten(), |
| keras.layers.Dense(64, activation=None), # No activation |
| keras.layers.UnitNormalization() # L2 normalization |
| ]) |
| |
| model.compile(optimizer="adam", loss=keras.losses.Circle()) |
| ``` |
| |
| Reference: |
| - [Yifan Sun et al., 2020](https://arxiv.org/abs/2002.10857) |
| |
| """ |
|
|
| def __init__( |
| self, |
| gamma=80.0, |
| margin=0.4, |
| remove_diagonal=True, |
| reduction="sum_over_batch_size", |
| name="circle", |
| dtype=None, |
| ): |
| super().__init__( |
| circle, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| gamma=gamma, |
| margin=margin, |
| remove_diagonal=remove_diagonal, |
| ) |
| self.gamma = gamma |
| self.margin = margin |
| self.remove_diagonal = remove_diagonal |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "gamma": self.gamma, |
| "margin": self.margin, |
| "remove_diagonal": self.remove_diagonal, |
| } |
| ) |
| return config |
|
|
|
|
| @keras_export("keras.losses.CategoricalGeneralizedCrossEntropy") |
| class CategoricalGeneralizedCrossEntropy(LossFunctionWrapper): |
| """Computes the Generalized Cross Entropy loss between `y_true` & `y_pred`. |
| |
| Generalized Cross Entropy (GCE) is a noise-robust loss function |
| that provides better robustness against noisy labels than |
| standard cross entropy. |
| It generalizes both cross entropy and mean absolute error through |
| the parameter q, where values closer to 1 make the loss more robust |
| to noisy labels. |
| |
| Formula: |
| ```python |
| loss = (1 - p**q) / q |
| ``` |
| where `p` is the predicted probability for the true class and `q` |
| is the noise parameter. |
| |
| Args: |
| q: Float in range `(0, 1)`. It is the noise parameter. |
| Controls the behavior of the loss: |
| - As `q` approaches 0: Behaves more like cross entropy |
| - As `q` approaches 1: Behaves more like mean absolute error |
| Defaults to `0.5` |
| reduction: Type of reduction to apply to the loss. In almost all cases |
| this should be `"sum_over_batch_size"`. Supported options are |
| `"sum"`, `"sum_over_batch_size"`, `"mean"`, |
| `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, |
| `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the |
| sample size, and `"mean_with_sample_weight"` sums the loss and |
| divides by the sum of the sample weights. `"none"` and `None` |
| perform no aggregation. Defaults to `"sum_over_batch_size"`. |
| name: Optional name for the loss instance. |
| dtype: The dtype of the loss's computations. Defaults to `None`, which |
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a |
| `"float32"` unless set to different value |
| (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is |
| provided, then the `compute_dtype` will be utilized. |
| |
| Example: |
| ```python |
| y_true = np.array([0, 1, 0, 1]) |
| y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]]) |
| keras.losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred) |
| ``` |
| |
| References: |
| - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836) |
| ("Generalized Cross Entropy Loss for Training |
| Deep Neural Networks with Noisy Labels") |
| """ |
|
|
| def __init__( |
| self, |
| q=0.5, |
| reduction="sum_over_batch_size", |
| name="categorical_generalized_cross_entropy", |
| dtype=None, |
| ): |
| if not 0 < q < 1: |
| raise ValueError("q must be in the interval (0, 1)") |
| super().__init__( |
| categorical_generalized_cross_entropy, |
| name=name, |
| reduction=reduction, |
| dtype=dtype, |
| q=q, |
| ) |
| self.q = q |
|
|
| def get_config(self): |
| config = Loss.get_config(self) |
| config.update( |
| { |
| "q": self.q, |
| } |
| ) |
| return config |
|
|
|
|
| def convert_binary_labels_to_hinge(y_true): |
| """Converts binary labels into -1/1 for hinge loss/metric calculation.""" |
| are_zeros = ops.equal(y_true, 0) |
| are_ones = ops.equal(y_true, 1) |
| is_binary = ops.all((ops.logical_or(are_zeros, are_ones))) |
|
|
| def _convert_binary_labels(): |
| |
| return 2.0 * y_true - 1.0 |
|
|
| def _return_labels_unconverted(): |
| |
| return y_true |
|
|
| updated_y_true = ops.cond( |
| is_binary, _convert_binary_labels, _return_labels_unconverted |
| ) |
| return updated_y_true |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.hinge", |
| "keras.losses.hinge", |
| ] |
| ) |
| def hinge(y_true, y_pred): |
| """Computes the hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1) |
| ``` |
| |
| Args: |
| y_true: The ground truth values. `y_true` values are expected to be -1 |
| or 1. If binary (0 or 1) labels are provided they will be converted |
| to -1 or 1 with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Hinge loss values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.choice([-1, 1], size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.hinge(y_true, y_pred) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, dtype=y_pred.dtype) |
| y_true = ops.convert_to_tensor(y_true) |
| y_true = convert_binary_labels_to_hinge(y_true) |
| return ops.mean(ops.maximum(1.0 - y_true * y_pred, 0.0), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.squared_hinge", |
| "keras.losses.squared_hinge", |
| ] |
| ) |
| def squared_hinge(y_true, y_pred): |
| """Computes the squared hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1) |
| ``` |
| |
| Args: |
| y_true: The ground truth values. `y_true` values are expected to be -1 |
| or 1. If binary (0 or 1) labels are provided we will convert them |
| to -1 or 1 with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Squared hinge loss values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.choice([-1, 1], size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.squared_hinge(y_true, y_pred) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
| y_true = convert_binary_labels_to_hinge(y_true) |
| return ops.mean( |
| ops.square(ops.maximum(1.0 - y_true * y_pred, 0.0)), axis=-1 |
| ) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.categorical_hinge", |
| "keras.losses.categorical_hinge", |
| ] |
| ) |
| def categorical_hinge(y_true, y_pred): |
| """Computes the categorical hinge loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = maximum(neg - pos + 1, 0) |
| ``` |
| |
| where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` |
| |
| Args: |
| y_true: The ground truth values. `y_true` values are expected to be |
| either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor) with |
| shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Categorical hinge loss values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 3, size=(2,)) |
| >>> y_true = np.eye(np.max(y_true) + 1)[y_true] |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.categorical_hinge(y_true, y_pred) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
| pos = ops.sum(y_true * y_pred, axis=-1) |
| neg = ops.max((1.0 - y_true) * y_pred, axis=-1) |
| zero = ops.cast(0.0, y_pred.dtype) |
| return ops.maximum(neg - pos + 1.0, zero) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.mean_squared_error", |
| "keras.losses.mean_squared_error", |
| |
| "keras._legacy.losses.mse", |
| "keras._legacy.losses.MSE", |
| "keras._legacy.metrics.mse", |
| "keras._legacy.metrics.MSE", |
| ] |
| ) |
| def mean_squared_error(y_true, y_pred): |
| """Computes the mean squared error between labels and predictions. |
| |
| Formula: |
| |
| ```python |
| loss = mean(square(y_true - y_pred), axis=-1) |
| ``` |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 2, size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.mean_squared_error(y_true, y_pred) |
| |
| Args: |
| y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Mean squared error values with shape = `[batch_size, d0, .. dN-1]`. |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| return ops.mean(ops.square(y_true - y_pred), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.mean_absolute_error", |
| "keras.losses.mean_absolute_error", |
| |
| "keras._legacy.losses.MAE", |
| "keras._legacy.losses.mae", |
| "keras._legacy.metrics.MAE", |
| "keras._legacy.metrics.mae", |
| ] |
| ) |
| def mean_absolute_error(y_true, y_pred): |
| """Computes the mean absolute error between labels and predictions. |
| |
| ```python |
| loss = mean(abs(y_true - y_pred), axis=-1) |
| ``` |
| |
| Args: |
| y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Mean absolute error values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 2, size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.mean_absolute_error(y_true, y_pred) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| return ops.mean(ops.abs(y_true - y_pred), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.mean_absolute_percentage_error", |
| "keras.losses.mean_absolute_percentage_error", |
| |
| "keras._legacy.losses.mape", |
| "keras._legacy.losses.MAPE", |
| "keras._legacy.metrics.mape", |
| "keras._legacy.metrics.MAPE", |
| ] |
| ) |
| def mean_absolute_percentage_error(y_true, y_pred): |
| """Computes the mean absolute percentage error between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1) |
| ``` |
| |
| Division by zero is prevented by dividing by `maximum(y_true, epsilon)` |
| where `epsilon = keras.backend.epsilon()` |
| (default to `1e-7`). |
| |
| Args: |
| y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Mean absolute percentage error values with shape = `[batch_size, d0, .. |
| dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.random(size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.mean_absolute_percentage_error(y_true, y_pred) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| epsilon = ops.convert_to_tensor(backend.epsilon(), dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| diff = ops.abs((y_true - y_pred) / ops.maximum(ops.abs(y_true), epsilon)) |
| return 100.0 * ops.mean(diff, axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.mean_squared_logarithmic_error", |
| "keras.losses.mean_squared_logarithmic_error", |
| |
| "keras._legacy.losses.msle", |
| "keras._legacy.losses.MSLE", |
| "keras._legacy.metrics.msle", |
| "keras._legacy.metrics.MSLE", |
| ] |
| ) |
| def mean_squared_logarithmic_error(y_true, y_pred): |
| """Computes the mean squared logarithmic error between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1) |
| ``` |
| |
| Note that `y_pred` and `y_true` cannot be less or equal to 0. Negative |
| values and 0 values will be replaced with `keras.backend.epsilon()` |
| (default to `1e-7`). |
| |
| Args: |
| y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Mean squared logarithmic error values with shape = `[batch_size, d0, .. |
| dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 2, size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.mean_squared_logarithmic_error(y_true, y_pred) |
| """ |
| epsilon = ops.convert_to_tensor(backend.epsilon()) |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| first_log = ops.log(ops.maximum(y_pred, epsilon) + 1.0) |
| second_log = ops.log(ops.maximum(y_true, epsilon) + 1.0) |
| return ops.mean(ops.square(first_log - second_log), axis=-1) |
|
|
|
|
| @keras_export("keras.losses.cosine_similarity") |
| def cosine_similarity(y_true, y_pred, axis=-1): |
| """Computes the cosine similarity between labels and predictions. |
| |
| Formula: |
| ```python |
| loss = -sum(l2_norm(y_true) * l2_norm(y_pred)) |
| ``` |
| |
| Note that it is a number between -1 and 1. When it is a negative number |
| between -1 and 0, 0 indicates orthogonality and values closer to -1 |
| indicate greater similarity. This makes it usable as a loss function in a |
| setting where you try to maximize the proximity between predictions and |
| targets. If either `y_true` or `y_pred` is a zero vector, cosine |
| similarity will be 0 regardless of the proximity between predictions |
| and targets. |
| |
| Args: |
| y_true: Tensor of true targets. |
| y_pred: Tensor of predicted targets. |
| axis: Axis along which to determine similarity. Defaults to `-1`. |
| |
| Returns: |
| Cosine similarity tensor. |
| |
| Example: |
| |
| >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] |
| >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] |
| >>> loss = keras.losses.cosine_similarity(y_true, y_pred, axis=-1) |
| [-0., -0.99999994, 0.99999994] |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| y_pred = normalize(y_pred, axis=axis) |
| y_true = normalize(y_true, axis=axis) |
| return -ops.sum(y_true * y_pred, axis=axis) |
|
|
|
|
| @keras_export(["keras.losses.huber", "keras.metrics.huber"]) |
| def huber(y_true, y_pred, delta=1.0): |
| """Computes Huber loss value. |
| |
| Formula: |
| ```python |
| for x in error: |
| if abs(x) <= delta: |
| loss.append(0.5 * x^2) |
| elif abs(x) > delta: |
| loss.append(delta * abs(x) - 0.5 * delta^2) |
| |
| loss = mean(loss, axis=-1) |
| ``` |
| See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). |
| |
| Example: |
| |
| >>> y_true = [[0, 1], [0, 0]] |
| >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] |
| >>> loss = keras.losses.huber(y_true, y_pred) |
| 0.155 |
| |
| |
| Args: |
| y_true: tensor of true targets. |
| y_pred: tensor of predicted targets. |
| delta: A float, the point where the Huber loss function changes from a |
| quadratic to linear. Defaults to `1.0`. |
| |
| Returns: |
| Tensor with one scalar loss entry per sample. |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| delta = ops.convert_to_tensor(delta, dtype=y_pred.dtype) |
| error = ops.subtract(y_pred, y_true) |
| abs_error = ops.abs(error) |
| half = ops.convert_to_tensor(0.5, dtype=abs_error.dtype) |
| return ops.mean( |
| ops.where( |
| abs_error <= delta, |
| half * ops.square(error), |
| delta * abs_error - half * ops.square(delta), |
| ), |
| axis=-1, |
| ) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.losses.log_cosh", |
| "keras.metrics.log_cosh", |
| |
| "keras._legacy.losses.logcosh", |
| "keras._legacy.metrics.logcosh", |
| ] |
| ) |
| def log_cosh(y_true, y_pred): |
| """Logarithm of the hyperbolic cosine of the prediction error. |
| |
| Formula: |
| ```python |
| loss = mean(log(cosh(y_pred - y_true)), axis=-1) |
| ``` |
| |
| Note that `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small |
| `x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works |
| mostly like the mean squared error, but will not be so strongly affected by |
| the occasional wildly incorrect prediction. |
| |
| Example: |
| |
| >>> y_true = [[0., 1.], [0., 0.]] |
| >>> y_pred = [[1., 1.], [0., 0.]] |
| >>> loss = keras.losses.log_cosh(y_true, y_pred) |
| 0.108 |
| |
| Args: |
| y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Logcosh error values with shape = `[batch_size, d0, .. dN-1]`. |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| y_true, y_pred = squeeze_or_expand_to_same_rank(y_true, y_pred) |
| log2 = ops.convert_to_tensor(ops.log(2.0), dtype=y_pred.dtype) |
|
|
| def _logcosh(x): |
| return x + ops.softplus(x * -2.0) - log2 |
|
|
| return ops.mean(_logcosh(y_pred - y_true), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.kl_divergence", |
| "keras.losses.kl_divergence", |
| |
| "keras._legacy.losses.KLD", |
| "keras._legacy.losses.kld", |
| "keras._legacy.losses.kullback_leibler_divergence", |
| "keras._legacy.metrics.KLD", |
| "keras._legacy.metrics.kld", |
| "keras._legacy.metrics.kullback_leibler_divergence", |
| ] |
| ) |
| def kl_divergence(y_true, y_pred): |
| """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. |
| |
| Formula: |
| |
| ```python |
| loss = y_true * log(y_true / y_pred) |
| ``` |
| |
| `y_true` and `y_pred` are expected to be probability |
| distributions, with values between 0 and 1. They will get |
| clipped to the `[0, 1]` range. |
| |
| Args: |
| y_true: Tensor of true targets. |
| y_pred: Tensor of predicted targets. |
| |
| Returns: |
| KL Divergence loss values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float32) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.kl_divergence(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> y_true = ops.clip(y_true, 1e-7, 1) |
| >>> y_pred = ops.clip(y_pred, 1e-7, 1) |
| >>> assert np.array_equal( |
| ... loss, np.sum(y_true * np.log(y_true / y_pred), axis=-1)) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, y_pred.dtype) |
| y_true = ops.clip(y_true, backend.epsilon(), 1) |
| y_pred = ops.clip(y_pred, backend.epsilon(), 1) |
| return ops.sum(y_true * ops.log(y_true / y_pred), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.poisson", |
| "keras.losses.poisson", |
| ] |
| ) |
| def poisson(y_true, y_pred): |
| """Computes the Poisson loss between y_true and y_pred. |
| |
| Formula: |
| |
| ```python |
| loss = y_pred - y_true * log(y_pred) |
| ``` |
| |
| Args: |
| y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. |
| |
| Returns: |
| Poisson loss values with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = np.random.randint(0, 2, size=(2, 3)) |
| >>> y_pred = np.random.random(size=(2, 3)) |
| >>> loss = keras.losses.poisson(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> y_pred = y_pred + 1e-7 |
| >>> assert np.allclose( |
| ... loss, np.mean(y_pred - y_true * np.log(y_pred), axis=-1), |
| ... atol=1e-5) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) |
| epsilon = ops.convert_to_tensor(backend.epsilon(), dtype=y_pred.dtype) |
| return ops.mean(y_pred - y_true * ops.log(y_pred + epsilon), axis=-1) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.categorical_crossentropy", |
| "keras.losses.categorical_crossentropy", |
| ] |
| ) |
| def categorical_crossentropy( |
| y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 |
| ): |
| """Computes the categorical crossentropy loss. |
| |
| Args: |
| y_true: Tensor of one-hot true targets. |
| y_pred: Tensor of predicted targets. |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For |
| example, if `0.1`, use `0.1 / num_classes` for non-target labels |
| and `0.9 + 0.1 / num_classes` for target labels. |
| axis: Defaults to `-1`. The dimension along which the entropy is |
| computed. |
| |
| Returns: |
| Categorical crossentropy loss value. |
| |
| Example: |
| |
| >>> y_true = [[0, 1, 0], [0, 0, 1]] |
| >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] |
| >>> loss = keras.losses.categorical_crossentropy(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.0513, 2.303], dtype=float32) |
| """ |
| if isinstance(axis, bool): |
| raise ValueError( |
| "`axis` must be of type `int`. " |
| f"Received: axis={axis} of type {type(axis)}" |
| ) |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| if y_pred.shape[-1] == 1: |
| warnings.warn( |
| "In loss categorical_crossentropy, expected " |
| "y_pred.shape to be (batch_size, num_classes) " |
| f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " |
| "Consider using 'binary_crossentropy' if you only have 2 classes.", |
| SyntaxWarning, |
| stacklevel=2, |
| ) |
|
|
| if label_smoothing: |
| num_classes = ops.cast(ops.shape(y_true)[-1], y_pred.dtype) |
| y_true = y_true * (1.0 - label_smoothing) + ( |
| label_smoothing / num_classes |
| ) |
|
|
| return ops.categorical_crossentropy( |
| y_true, y_pred, from_logits=from_logits, axis=axis |
| ) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.categorical_focal_crossentropy", |
| "keras.losses.categorical_focal_crossentropy", |
| ] |
| ) |
| def categorical_focal_crossentropy( |
| y_true, |
| y_pred, |
| alpha=0.25, |
| gamma=2.0, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| ): |
| """Computes the categorical focal crossentropy loss. |
| |
| Args: |
| y_true: Tensor of one-hot true targets. |
| y_pred: Tensor of predicted targets. |
| alpha: A weight balancing factor for all classes, default is `0.25` as |
| mentioned in the reference. It can be a list of floats or a scalar. |
| In the multi-class case, alpha may be set by inverse class |
| frequency by using `compute_class_weight` from `sklearn.utils`. |
| gamma: A focusing parameter, default is `2.0` as mentioned in the |
| reference. It helps to gradually reduce the importance given to |
| simple examples in a smooth manner. When `gamma` = 0, there is |
| no focal effect on the categorical crossentropy. |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability |
| distribution. |
| label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For |
| example, if `0.1`, use `0.1 / num_classes` for non-target labels |
| and `0.9 + 0.1 / num_classes` for target labels. |
| axis: Defaults to `-1`. The dimension along which the entropy is |
| computed. |
| |
| Returns: |
| Categorical focal crossentropy loss value. |
| |
| Example: |
| |
| >>> y_true = [[0, 1, 0], [0, 0, 1]] |
| >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]] |
| >>> loss = keras.losses.categorical_focal_crossentropy(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([2.63401289e-04, 6.75912094e-01], dtype=float32) |
| """ |
| if isinstance(axis, bool): |
| raise ValueError( |
| "`axis` must be of type `int`. " |
| f"Received: axis={axis} of type {type(axis)}" |
| ) |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| if y_pred.shape[-1] == 1: |
| warnings.warn( |
| "In loss categorical_focal_crossentropy, expected " |
| "y_pred.shape to be (batch_size, num_classes) " |
| f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " |
| "Consider using 'binary_crossentropy' if you only have 2 classes.", |
| SyntaxWarning, |
| stacklevel=2, |
| ) |
|
|
| if label_smoothing: |
| num_classes = ops.cast(ops.shape(y_true)[-1], y_pred.dtype) |
| y_true = y_true * (1.0 - label_smoothing) + ( |
| label_smoothing / num_classes |
| ) |
|
|
| if from_logits: |
| y_pred = ops.softmax(y_pred, axis=axis) |
|
|
| |
| |
| |
| |
| output = y_pred / ops.sum(y_pred, axis=axis, keepdims=True) |
| output = ops.clip(output, backend.epsilon(), 1.0 - backend.epsilon()) |
|
|
| |
| cce = -y_true * ops.log(output) |
|
|
| |
| modulating_factor = ops.power(1.0 - output, gamma) |
| weighting_factor = ops.multiply(modulating_factor, alpha) |
|
|
| |
| focal_cce = ops.multiply(weighting_factor, cce) |
| focal_cce = ops.sum(focal_cce, axis=axis) |
| return focal_cce |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.sparse_categorical_crossentropy", |
| "keras.losses.sparse_categorical_crossentropy", |
| ] |
| ) |
| def sparse_categorical_crossentropy( |
| y_true, y_pred, from_logits=False, ignore_class=None, axis=-1 |
| ): |
| """Computes the sparse categorical crossentropy loss. |
| |
| Args: |
| y_true: Ground truth values. |
| y_pred: The predicted values. |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| ignore_class: Optional integer. The ID of a class to be ignored during |
| loss computation. This is useful, for example, in segmentation |
| problems featuring a "void" class (commonly -1 or 255) in |
| segmentation maps. By default (`ignore_class=None`), all classes are |
| considered. |
| axis: Defaults to `-1`. The dimension along which the entropy is |
| computed. |
| |
| Returns: |
| Sparse categorical crossentropy loss value. |
| |
| Examples: |
| |
| >>> y_true = [1, 2] |
| >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] |
| >>> loss = keras.losses.sparse_categorical_crossentropy(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.0513, 2.303], dtype=float32) |
| """ |
|
|
| if len(y_true.shape) == len(y_pred.shape) and y_true.shape[-1] == 1: |
| y_true = ops.squeeze(y_true, axis=-1) |
|
|
| if ignore_class is not None: |
| res_shape = ops.shape(y_pred)[:-1] |
| valid_mask = ops.not_equal(y_true, ops.cast(ignore_class, y_pred.dtype)) |
| y_true = y_true * ops.cast(valid_mask, y_true.dtype) |
| y_pred = y_pred * ops.cast( |
| ops.expand_dims(valid_mask, -1), y_pred.dtype |
| ) |
|
|
| res = ops.sparse_categorical_crossentropy( |
| y_true, |
| y_pred, |
| from_logits=from_logits, |
| axis=axis, |
| ) |
|
|
| if ignore_class is not None: |
| valid_mask = ops.reshape(valid_mask, res_shape) |
| res = ops.where(valid_mask, res, 0.0) |
| backend.set_keras_mask(res, mask=valid_mask) |
|
|
| return res |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.binary_crossentropy", |
| "keras.losses.binary_crossentropy", |
| ] |
| ) |
| def binary_crossentropy( |
| y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 |
| ): |
| """Computes the binary crossentropy loss. |
| |
| Args: |
| y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. |
| y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels by |
| squeezing them towards 0.5, that is, |
| using `1. - 0.5 * label_smoothing` for the target class |
| and `0.5 * label_smoothing` for the non-target class. |
| axis: The axis along which the mean is computed. Defaults to `-1`. |
| |
| Returns: |
| Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = [[0, 1], [0, 0]] |
| >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] |
| >>> loss = keras.losses.binary_crossentropy(y_true, y_pred) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.916 , 0.714], dtype=float32) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| if label_smoothing: |
| y_true = y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing |
|
|
| return ops.mean( |
| ops.binary_crossentropy(y_true, y_pred, from_logits=from_logits), |
| axis=axis, |
| ) |
|
|
|
|
| @keras_export( |
| [ |
| "keras.metrics.binary_focal_crossentropy", |
| "keras.losses.binary_focal_crossentropy", |
| ] |
| ) |
| def binary_focal_crossentropy( |
| y_true, |
| y_pred, |
| apply_class_balancing=False, |
| alpha=0.25, |
| gamma=2.0, |
| from_logits=False, |
| label_smoothing=0.0, |
| axis=-1, |
| ): |
| """Computes the binary focal crossentropy loss. |
| |
| According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it |
| helps to apply a focal factor to down-weight easy examples and focus more on |
| hard examples. By default, the focal tensor is computed as follows: |
| |
| `focal_factor = (1 - output) ** gamma` for class 1 |
| `focal_factor = output ** gamma` for class 0 |
| where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal |
| effect on the binary crossentropy loss. |
| |
| If `apply_class_balancing == True`, this function also takes into account a |
| weight balancing factor for the binary classes 0 and 1 as follows: |
| |
| `weight = alpha` for class 1 (`target == 1`) |
| `weight = 1 - alpha` for class 0 |
| where `alpha` is a float in the range of `[0, 1]`. |
| |
| Args: |
| y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. |
| y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. |
| apply_class_balancing: A bool, whether to apply weight balancing on the |
| binary classes 0 and 1. |
| alpha: A weight balancing factor for class 1, default is `0.25` as |
| mentioned in the reference. The weight for class 0 is `1.0 - alpha`. |
| gamma: A focusing parameter, default is `2.0` as mentioned in the |
| reference. |
| from_logits: Whether `y_pred` is expected to be a logits tensor. By |
| default, we assume that `y_pred` encodes a probability distribution. |
| label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels by |
| squeezing them towards 0.5, that is, |
| using `1. - 0.5 * label_smoothing` for the target class |
| and `0.5 * label_smoothing` for the non-target class. |
| axis: The axis along which the mean is computed. Defaults to `-1`. |
| |
| Returns: |
| Binary focal crossentropy loss value |
| with shape = `[batch_size, d0, .. dN-1]`. |
| |
| Example: |
| |
| >>> y_true = [[0, 1], [0, 0]] |
| >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] |
| >>> # In this instance, the first sample in the second batch is the |
| >>> # 'easier' example. |
| >>> focal_loss = keras.losses.binary_focal_crossentropy( |
| ... y_true, y_pred, gamma=2) |
| >>> assert loss.shape == (2,) |
| >>> focal_loss |
| array([0.330, 0.206], dtype=float32) |
| >>> # Compare with binary_crossentropy |
| >>> bce_loss = keras.losses.binary_focal_crossentropy( |
| ... y_true, y_pred) |
| >>> bce_loss |
| array([0.916, 0.714], dtype=float32) |
| >>> # Binary focal crossentropy loss attributes more importance to the |
| >>> # harder example which results in a higher loss for the first batch |
| >>> # when normalized by binary cross entropy loss |
| >>> focal_loss/bce_loss |
| array([0.360, 0.289] |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| if label_smoothing: |
| y_true = y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing |
|
|
| if from_logits: |
| y_pred = ops.sigmoid(y_pred) |
|
|
| bce = ops.binary_crossentropy( |
| target=y_true, |
| output=y_pred, |
| from_logits=False, |
| ) |
|
|
| |
| p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred) |
| focal_factor = ops.power(1.0 - p_t, gamma) |
|
|
| focal_bce = focal_factor * bce |
|
|
| if apply_class_balancing: |
| weight = y_true * alpha + (1 - y_true) * (1 - alpha) |
| focal_bce = weight * focal_bce |
|
|
| return ops.mean(focal_bce, axis=axis) |
|
|
|
|
| @keras_export("keras.losses.ctc") |
| def ctc(y_true, y_pred): |
| """CTC (Connectionist Temporal Classification) loss. |
| |
| Args: |
| y_true: A tensor of shape `(batch_size, max_length)` containing |
| the true labels in integer format. `0` always represents |
| the blank/mask index and should not be used for classes. |
| y_pred: A tensor of shape `(batch_size, max_length, num_classes)` |
| containing logits (the output of your model). |
| They should *not* be normalized via softmax. |
| """ |
| if len(ops.shape(y_true)) != 2: |
| raise ValueError( |
| "Targets `y_true` are expected to be a tensor of shape " |
| "`(batch_size, max_length)` in integer format. " |
| f"Received: y_true.shape={ops.shape(y_true)}" |
| ) |
| if len(ops.shape(y_pred)) != 3: |
| raise ValueError( |
| "Logits `y_pred` are expected to be a tensor of shape " |
| "`(batch_size, max_length, num_classes)`. " |
| f"Received: y_pred.shape={ops.shape(y_pred)}" |
| ) |
|
|
| mask_index = 0 |
| batch_length = ops.shape(y_pred)[0] |
| input_length = ops.shape(y_pred)[1] |
| input_length = input_length * ops.ones((batch_length,), dtype="int32") |
| label_length = ops.cast( |
| ops.sum(y_true != mask_index, axis=-1), dtype="int32" |
| ) |
|
|
| return ops.ctc_loss( |
| y_true, y_pred, label_length, input_length, mask_index=mask_index |
| ) |
|
|
|
|
| @keras_export("keras.losses.dice") |
| def dice(y_true, y_pred, axis=None): |
| """Computes the Dice loss value between `y_true` and `y_pred`. |
| |
| Formula: |
| ```python |
| loss = 1 - (2 * sum(y_true * y_pred)) / (sum(y_true) + sum(y_pred)) |
| ``` |
| |
| Args: |
| y_true: tensor of true targets. |
| y_pred: tensor of predicted targets. |
| axis: tuple for which dimensions the loss is calculated |
| |
| Returns: |
| Dice loss value. |
| |
| Example: |
| |
| >>> y_true = [[[[1.0], [1.0]], [[0.0], [0.0]]], |
| ... [[[1.0], [1.0]], [[0.0], [0.0]]]] |
| >>> y_pred = [[[[0.0], [1.0]], [[0.0], [1.0]]], |
| ... [[[0.4], [0.0]], [[0.0], [0.9]]]] |
| >>> axis = (1, 2, 3) |
| >>> loss = keras.losses.dice(y_true, y_pred, axis=axis) |
| >>> assert loss.shape == (2,) |
| >>> loss |
| array([0.5, 0.75757575], shape=(2,), dtype=float32) |
| |
| >>> loss = keras.losses.dice(y_true, y_pred) |
| >>> assert loss.shape == () |
| >>> loss |
| array(0.6164384, shape=(), dtype=float32) |
| |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| inputs = y_true |
| targets = y_pred |
|
|
| intersection = ops.sum(inputs * targets, axis=axis) |
| dice = ops.divide( |
| 2.0 * intersection, |
| ops.sum(y_true, axis=axis) |
| + ops.sum(y_pred, axis=axis) |
| + backend.epsilon(), |
| ) |
|
|
| return 1 - dice |
|
|
|
|
| @keras_export("keras.losses.tversky") |
| def tversky(y_true, y_pred, alpha=0.5, beta=0.5, axis=None): |
| """Computes the Tversky loss value between `y_true` and `y_pred`. |
| |
| This loss function is weighted by the alpha and beta coefficients |
| that penalize false positives and false negatives. |
| |
| With `alpha=0.5` and `beta=0.5`, the loss value becomes equivalent to |
| Dice Loss. |
| |
| Args: |
| y_true: tensor of true targets. |
| y_pred: tensor of predicted targets. |
| alpha: coefficient controlling incidence of false positives. |
| beta: coefficient controlling incidence of false negatives. |
| axis: tuple for which dimensions the loss is calculated. |
| |
| Returns: |
| Tversky loss value. |
| |
| Reference: |
| |
| - [Salehi et al., 2017](https://arxiv.org/abs/1706.05721) |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, y_pred.dtype) |
|
|
| inputs = y_true |
| targets = y_pred |
|
|
| intersection = ops.sum(inputs * targets, axis=axis) |
| fp = ops.sum((1 - targets) * inputs, axis=axis) |
| fn = ops.sum(targets * (1 - inputs), axis=axis) |
|
|
| tversky = ops.divide( |
| intersection, |
| intersection + fp * alpha + fn * beta + backend.epsilon(), |
| ) |
|
|
| return 1 - tversky |
|
|
|
|
| @keras_export("keras.losses.circle") |
| def circle( |
| y_true, |
| y_pred, |
| ref_labels=None, |
| ref_embeddings=None, |
| remove_diagonal=True, |
| gamma=80, |
| margin=0.4, |
| ): |
| """Computes the Circle loss. |
| |
| It is designed to minimize within-class distances and maximize between-class |
| distances in L2 normalized embedding space. |
| |
| Args: |
| y_true: Tensor with ground truth labels in integer format. |
| y_pred: Tensor with predicted L2 normalized embeddings. |
| ref_labels: Optional integer tensor with labels for reference |
| embeddings. If `None`, defaults to `y_true`. |
| ref_embeddings: Optional tensor with L2 normalized reference embeddings. |
| If `None`, defaults to `y_pred`. |
| remove_diagonal: Boolean, whether to remove self-similarities from |
| positive mask. Defaults to `True`. |
| gamma: Float, scaling factor for the loss. Defaults to `80`. |
| margin: Float, relaxation factor for the loss. Defaults to `0.4`. |
| |
| Returns: |
| Circle loss value. |
| """ |
| y_pred = ops.convert_to_tensor(y_pred) |
| y_true = ops.cast(y_true, "int32") |
| ref_embeddings = ( |
| y_pred |
| if ref_embeddings is None |
| else ops.convert_to_tensor(ref_embeddings) |
| ) |
| ref_labels = y_true if ref_labels is None else ops.cast(ref_labels, "int32") |
|
|
| optim_pos = margin |
| optim_neg = 1 + margin |
| delta_pos = margin |
| delta_neg = 1 - margin |
|
|
| pairwise_cosine_distances = 1 - ops.matmul( |
| y_pred, ops.transpose(ref_embeddings) |
| ) |
|
|
| pairwise_cosine_distances = ops.maximum(pairwise_cosine_distances, 0.0) |
| positive_mask, negative_mask = build_pos_neg_masks( |
| y_true, |
| ref_labels, |
| remove_diagonal=remove_diagonal, |
| ) |
| positive_mask = ops.cast( |
| positive_mask, dtype=pairwise_cosine_distances.dtype |
| ) |
| negative_mask = ops.cast( |
| negative_mask, dtype=pairwise_cosine_distances.dtype |
| ) |
|
|
| pos_weights = optim_pos + pairwise_cosine_distances |
| pos_weights = pos_weights * positive_mask |
| pos_weights = ops.maximum(pos_weights, 0.0) |
| neg_weights = optim_neg - pairwise_cosine_distances |
| neg_weights = neg_weights * negative_mask |
| neg_weights = ops.maximum(neg_weights, 0.0) |
|
|
| pos_dists = delta_pos - pairwise_cosine_distances |
| neg_dists = delta_neg - pairwise_cosine_distances |
|
|
| pos_wdists = -1 * gamma * pos_weights * pos_dists |
| neg_wdists = gamma * neg_weights * neg_dists |
|
|
| p_loss = ops.logsumexp( |
| ops.where(positive_mask, pos_wdists, float("-inf")), |
| axis=1, |
| ) |
| n_loss = ops.logsumexp( |
| ops.where(negative_mask, neg_wdists, float("-inf")), |
| axis=1, |
| ) |
|
|
| circle_loss = ops.softplus(p_loss + n_loss) |
| backend.set_keras_mask(circle_loss, circle_loss > 0) |
| return circle_loss |
|
|
|
|
| @keras_export("keras.losses.categorical_generalized_cross_entropy") |
| def categorical_generalized_cross_entropy(y_true, y_pred, q): |
| """Computes the Generalized Cross Entropy loss. |
| |
| Generalized Cross Entropy (GCE) is a noise-robust loss function that |
| provides better robustness against noisy labels than standard cross entropy. |
| It generalizes both cross entropy and mean absolute error through |
| the parameter q, where values closer to 1 make the loss more robust |
| to noisy labels. |
| |
| Formula: |
| ```python |
| loss = (1 - p**q) / q |
| ``` |
| where `p` is the predicted probability for the true class and `q` |
| is the noise parameter. |
| |
| Args: |
| y_true: Ground truth labels. Expected to contain *integer class indices* |
| with shape `[batch_size]` or `[batch_size, 1]`. |
| y_pred: The predicted class probabilities, with shape |
| `[batch_size, num_classes]`. |
| q: Float in range `(0, 1)`. It is the noise parameter. |
| Controls the behavior of the loss: |
| - As `q` approaches 0: Behaves more like cross entropy |
| - As `q` approaches 1: Behaves more like mean absolute error |
| |
| Returns: |
| GCE loss values with shape `[batch_size]`. |
| ``` |
| |
| References: |
| - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836) |
| ("Generalized Cross Entropy Loss for Training |
| Deep Neural Networks with Noisy Labels") |
| """ |
|
|
| |
| y_true_one_hot = ops.one_hot( |
| ops.cast(y_true, "int"), num_classes=ops.shape(y_pred)[-1] |
| ) |
| y_true_one_hot = ops.cast(y_true_one_hot, y_pred.dtype) |
| |
| p = ops.sum(y_pred * y_true_one_hot, axis=-1) |
|
|
| |
| gce_loss = (1 - ops.power(p, q)) / q |
|
|
| return gce_loss |
|
|