| | #ifndef CAFFE_TEST_GRADIENT_CHECK_UTIL_H_ |
| | #define CAFFE_TEST_GRADIENT_CHECK_UTIL_H_ |
| |
|
| | #include <glog/logging.h> |
| | #include <gtest/gtest.h> |
| |
|
| | #include <algorithm> |
| | #include <cmath> |
| | #include <vector> |
| |
|
| | #include "caffe/layer.hpp" |
| | #include "caffe/net.hpp" |
| |
|
| | namespace caffe { |
| |
|
| | |
| | |
| | template <typename Dtype> |
| | class GradientChecker { |
| | public: |
| | |
| | |
| | |
| | GradientChecker(const Dtype stepsize, const Dtype threshold, |
| | const unsigned int seed = 1701, const Dtype kink = 0., |
| | const Dtype kink_range = -1) |
| | : stepsize_(stepsize), threshold_(threshold), seed_(seed), |
| | kink_(kink), kink_range_(kink_range) {} |
| | |
| | |
| | |
| | |
| | void CheckGradient(Layer<Dtype>* layer, const vector<Blob<Dtype>*>& bottom, |
| | const vector<Blob<Dtype>*>& top, int check_bottom = -1) { |
| | layer->SetUp(bottom, top); |
| | CheckGradientSingle(layer, bottom, top, check_bottom, -1, -1); |
| | } |
| | void CheckGradientExhaustive(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
| | int check_bottom = -1); |
| |
|
| | |
| | |
| | |
| | void CheckGradientEltwise(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| |
|
| | |
| | |
| | |
| | |
| | void CheckGradientSingle(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
| | int check_bottom, int top_id, int top_data_id, bool element_wise = false); |
| |
|
| | |
| | |
| | |
| | |
| | void CheckGradientNet(const Net<Dtype>& net, |
| | const vector<Blob<Dtype>*>& input); |
| |
|
| | protected: |
| | Dtype GetObjAndGradient(const Layer<Dtype>& layer, |
| | const vector<Blob<Dtype>*>& top, int top_id = -1, int top_data_id = -1); |
| | Dtype stepsize_; |
| | Dtype threshold_; |
| | unsigned int seed_; |
| | Dtype kink_; |
| | Dtype kink_range_; |
| | }; |
| |
|
| |
|
| | template <typename Dtype> |
| | void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
| | int check_bottom, int top_id, int top_data_id, bool element_wise) { |
| | if (element_wise) { |
| | CHECK_EQ(0, layer->blobs().size()); |
| | CHECK_LE(0, top_id); |
| | CHECK_LE(0, top_data_id); |
| | const int top_count = top[top_id]->count(); |
| | for (int blob_id = 0; blob_id < bottom.size(); ++blob_id) { |
| | CHECK_EQ(top_count, bottom[blob_id]->count()); |
| | } |
| | } |
| | |
| | |
| | vector<Blob<Dtype>*> blobs_to_check; |
| | vector<bool> propagate_down(bottom.size(), check_bottom == -1); |
| | for (int i = 0; i < layer->blobs().size(); ++i) { |
| | Blob<Dtype>* blob = layer->blobs()[i].get(); |
| | caffe_set(blob->count(), static_cast<Dtype>(0), blob->mutable_cpu_diff()); |
| | blobs_to_check.push_back(blob); |
| | } |
| | if (check_bottom == -1) { |
| | for (int i = 0; i < bottom.size(); ++i) { |
| | blobs_to_check.push_back(bottom[i]); |
| | } |
| | } else if (check_bottom >= 0) { |
| | CHECK_LT(check_bottom, bottom.size()); |
| | blobs_to_check.push_back(bottom[check_bottom]); |
| | propagate_down[check_bottom] = true; |
| | } |
| | CHECK_GT(blobs_to_check.size(), 0) << "No blobs to check."; |
| | |
| | Caffe::set_random_seed(seed_); |
| | |
| | |
| | layer->Forward(bottom, top); |
| | |
| | GetObjAndGradient(*layer, top, top_id, top_data_id); |
| | layer->Backward(top, propagate_down, bottom); |
| | |
| | vector<shared_ptr<Blob<Dtype> > > |
| | computed_gradient_blobs(blobs_to_check.size()); |
| | for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { |
| | Blob<Dtype>* current_blob = blobs_to_check[blob_id]; |
| | computed_gradient_blobs[blob_id].reset(new Blob<Dtype>()); |
| | computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob); |
| | const int count = blobs_to_check[blob_id]->count(); |
| | const Dtype* diff = blobs_to_check[blob_id]->cpu_diff(); |
| | Dtype* computed_gradients = |
| | computed_gradient_blobs[blob_id]->mutable_cpu_data(); |
| | caffe_copy(count, diff, computed_gradients); |
| | } |
| | |
| | |
| | |
| | for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { |
| | Blob<Dtype>* current_blob = blobs_to_check[blob_id]; |
| | const Dtype* computed_gradients = |
| | computed_gradient_blobs[blob_id]->cpu_data(); |
| | |
| | |
| | for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { |
| | |
| | |
| | |
| | |
| | |
| | Dtype estimated_gradient = 0; |
| | Dtype positive_objective = 0; |
| | Dtype negative_objective = 0; |
| | if (!element_wise || (feat_id == top_data_id)) { |
| | |
| | |
| | current_blob->mutable_cpu_data()[feat_id] += stepsize_; |
| | Caffe::set_random_seed(seed_); |
| | layer->Forward(bottom, top); |
| | positive_objective = |
| | GetObjAndGradient(*layer, top, top_id, top_data_id); |
| | |
| | current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; |
| | Caffe::set_random_seed(seed_); |
| | layer->Forward(bottom, top); |
| | negative_objective = |
| | GetObjAndGradient(*layer, top, top_id, top_data_id); |
| | |
| | current_blob->mutable_cpu_data()[feat_id] += stepsize_; |
| | estimated_gradient = (positive_objective - negative_objective) / |
| | stepsize_ / 2.; |
| | } |
| | Dtype computed_gradient = computed_gradients[feat_id]; |
| | Dtype feature = current_blob->cpu_data()[feat_id]; |
| | |
| | |
| | if (kink_ - kink_range_ > fabs(feature) |
| | || fabs(feature) > kink_ + kink_range_) { |
| | |
| | |
| | Dtype scale = std::max<Dtype>( |
| | std::max(fabs(computed_gradient), fabs(estimated_gradient)), |
| | Dtype(1.)); |
| | EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale) |
| | << "debug: (top_id, top_data_id, blob_id, feat_id)=" |
| | << top_id << "," << top_data_id << "," << blob_id << "," << feat_id |
| | << "; feat = " << feature |
| | << "; objective+ = " << positive_objective |
| | << "; objective- = " << negative_objective; |
| | } |
| | |
| | |
| | |
| | } |
| | } |
| | } |
| |
|
| | template <typename Dtype> |
| | void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
| | int check_bottom) { |
| | layer->SetUp(bottom, top); |
| | CHECK_GT(top.size(), 0) << "Exhaustive mode requires at least one top blob."; |
| | |
| | for (int i = 0; i < top.size(); ++i) { |
| | |
| | for (int j = 0; j < top[i]->count(); ++j) { |
| | |
| | CheckGradientSingle(layer, bottom, top, check_bottom, i, j); |
| | } |
| | } |
| | } |
| |
|
| | template <typename Dtype> |
| | void GradientChecker<Dtype>::CheckGradientEltwise(Layer<Dtype>* layer, |
| | const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { |
| | layer->SetUp(bottom, top); |
| | CHECK_GT(top.size(), 0) << "Eltwise mode requires at least one top blob."; |
| | const int check_bottom = -1; |
| | const bool element_wise = true; |
| | for (int i = 0; i < top.size(); ++i) { |
| | for (int j = 0; j < top[i]->count(); ++j) { |
| | CheckGradientSingle(layer, bottom, top, check_bottom, i, j, element_wise); |
| | } |
| | } |
| | } |
| |
|
| | template <typename Dtype> |
| | void GradientChecker<Dtype>::CheckGradientNet( |
| | const Net<Dtype>& net, const vector<Blob<Dtype>*>& input) { |
| | const vector<shared_ptr<Layer<Dtype> > >& layers = net.layers(); |
| | vector<vector<Blob<Dtype>*> >& bottom_vecs = net.bottom_vecs(); |
| | vector<vector<Blob<Dtype>*> >& top_vecs = net.top_vecs(); |
| | for (int i = 0; i < layers.size(); ++i) { |
| | net.Forward(input); |
| | LOG(ERROR) << "Checking gradient for " << layers[i]->layer_param().name(); |
| | CheckGradientExhaustive(*(layers[i].get()), bottom_vecs[i], top_vecs[i]); |
| | } |
| | } |
| |
|
| | template <typename Dtype> |
| | Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer, |
| | const vector<Blob<Dtype>*>& top, int top_id, int top_data_id) { |
| | Dtype loss = 0; |
| | if (top_id < 0) { |
| | |
| | for (int i = 0; i < top.size(); ++i) { |
| | Blob<Dtype>* top_blob = top[i]; |
| | const Dtype* top_blob_data = top_blob->cpu_data(); |
| | Dtype* top_blob_diff = top_blob->mutable_cpu_diff(); |
| | int count = top_blob->count(); |
| | for (int j = 0; j < count; ++j) { |
| | loss += top_blob_data[j] * top_blob_data[j]; |
| | } |
| | |
| | caffe_copy(top_blob->count(), top_blob_data, top_blob_diff); |
| | } |
| | loss /= 2.; |
| | } else { |
| | |
| | for (int i = 0; i < top.size(); ++i) { |
| | Blob<Dtype>* top_blob = top[i]; |
| | Dtype* top_blob_diff = top_blob->mutable_cpu_diff(); |
| | caffe_set(top_blob->count(), Dtype(0), top_blob_diff); |
| | } |
| | const Dtype loss_weight = 2; |
| | loss = top[top_id]->cpu_data()[top_data_id] * loss_weight; |
| | top[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight; |
| | } |
| | return loss; |
| | } |
| |
|
| | } |
| |
|
| | #endif |
| |
|