#ifndef CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_ #define CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_ #include #include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/layers/loss_layer.hpp" namespace caffe { /** * @brief Computes the multinomial logistic loss for a one-of-many * classification task, directly taking a predicted probability * distribution as input. * * When predictions are not already a probability distribution, you should * instead use the SoftmaxWithLossLayer, which maps predictions to a * distribution using the SoftmaxLayer, before computing the multinomial * logistic loss. The SoftmaxWithLossLayer should be preferred over separate * SoftmaxLayer + MultinomialLogisticLossLayer * as its gradient computation is more numerically stable. * * @param bottom input Blob vector (length 2) * -# @f$ (N \times C \times H \times W) @f$ * the predictions @f$ \hat{p} @f$, a Blob with values in * @f$ [0, 1] @f$ indicating the predicted probability of each of the * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$ * should sum to 1 as in a probability distribution: @f$ * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$. * -# @f$ (N \times 1 \times 1 \times 1) @f$ * the labels @f$ l @f$, an integer-valued Blob with values * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ * indicating the correct class label among the @f$ K @f$ classes * @param top output Blob vector (length 1) * -# @f$ (1 \times 1 \times 1 \times 1) @f$ * the computed multinomial logistic loss: @f$ E = * \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n}) * @f$ */ template class MultinomialLogisticLossLayer : public LossLayer { public: explicit MultinomialLogisticLossLayer(const LayerParameter& param) : LossLayer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "MultinomialLogisticLoss"; } protected: /// @copydoc MultinomialLogisticLossLayer virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); /** * @brief Computes the multinomial logistic loss error gradient w.r.t. the * predictions. * * Gradients cannot be computed with respect to the label inputs (bottom[1]), * so this method ignores bottom[1] and requires !propagate_down[1], crashing * if propagate_down[1] is set. * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs * -# @f$ (1 \times 1 \times 1 \times 1) @f$ * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, * as @f$ \lambda @f$ is the coefficient of this layer's output * @f$\ell_i@f$ in the overall Net loss * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. * (*Assuming that this top Blob is not used as a bottom (input) by any * other layer of the Net.) * @param propagate_down see Layer::Backward. * propagate_down[1] must be false as we can't compute gradients with * respect to the labels. * @param bottom input Blob vector (length 2) * -# @f$ (N \times C \times H \times W) @f$ * the predictions @f$ \hat{p} @f$; Backward computes diff * @f$ \frac{\partial E}{\partial \hat{p}} @f$ * -# @f$ (N \times 1 \times 1 \times 1) @f$ * the labels -- ignored as we can't compute their error gradients */ virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); }; } // namespace caffe #endif // CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_