ceres-solver-v1 / colmap /src /retrieval /vote_and_verify.cc
camenduru's picture
ceres-solver and colmap
7b7496d
// Copyright (c) 2022, ETH Zurich and UNC Chapel Hill.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
#include "retrieval/vote_and_verify.h"
#include <array>
#include <unordered_map>
#include "estimators/affine_transform.h"
#include "optim/ransac.h"
#include "util/logging.h"
#include "util/math.h"
namespace colmap {
namespace retrieval {
namespace {
// Affine transformation from left to right and from left to right image.
struct TwoWayTransform {
TwoWayTransform()
: A12(Eigen::Matrix2f::Zero()),
t12(Eigen::Vector2f::Zero()),
A21(Eigen::Matrix2f::Zero()),
t21(Eigen::Vector2f::Zero()) {}
TwoWayTransform(const FeatureGeometryTransform& tform) {
const float sin_angle = std::sin(tform.angle);
const float cos_angle = std::cos(tform.angle);
Eigen::Matrix2f R;
R << cos_angle, -sin_angle, sin_angle, cos_angle;
A12 = tform.scale * R;
t12 << tform.tx, tform.ty;
A21 = R.transpose() / tform.scale;
t21 = -A21 * t12;
}
Eigen::Matrix2f A12;
Eigen::Vector2f t12;
Eigen::Matrix2f A21;
Eigen::Vector2f t21;
};
// Class representing a single bin in the voting space of a similarity
// transformation. Keeps track of the mean transformation described by the bin.
class VotingBin {
public:
void Vote(const FeatureGeometryTransform& tform) {
num_votes_ += 1;
sum_tform_.scale += tform.scale;
sum_tform_.angle += tform.angle;
sum_tform_.tx += tform.tx;
sum_tform_.ty += tform.ty;
}
// Get the number of votes.
size_t GetNumVotes() const { return num_votes_; }
// Compute the mean transformation of the voting bin.
FeatureGeometryTransform GetTransformation() const {
const float inv_num_votes = 1.0f / static_cast<float>(num_votes_);
FeatureGeometryTransform tform = sum_tform_;
tform.scale *= inv_num_votes;
tform.angle *= inv_num_votes;
tform.tx *= inv_num_votes;
tform.ty *= inv_num_votes;
return tform;
}
private:
size_t num_votes_ = 0;
FeatureGeometryTransform sum_tform_;
};
// Compute the difference in scale between the two features when aligning them
// with the given transformation.
float ComputeScaleError(const FeatureGeometry& feature1,
const FeatureGeometry& feature2,
const TwoWayTransform& tform) {
const float area_transformed = feature1.GetAreaUnderTransform(tform.A21);
const float area_measured = feature2.GetArea();
if (area_transformed > area_measured) {
return area_transformed / area_measured;
} else {
return area_measured / area_transformed;
}
}
// Compute the two-way transfer error between two features.
float ComputeTransferError(const FeatureGeometry& feature1,
const FeatureGeometry& feature2,
const TwoWayTransform& tform) {
const Eigen::Vector2f xy1(feature1.x, feature1.y);
const Eigen::Vector2f xy2(feature2.x, feature2.y);
const float error1 = (xy2 - tform.A12 * xy1 - tform.t12).squaredNorm();
const float error2 = (xy1 - tform.A21 * xy2 - tform.t21).squaredNorm();
return error1 + error2;
}
// Compute inlier matches that satisfy the transfer, scale thresholds.
void ComputeInliers(const TwoWayTransform& tform,
const std::vector<FeatureGeometryMatch>& matches,
const float max_transfer_error, const float max_scale_error,
std::vector<std::pair<int, int>>* inlier_idxs) {
CHECK_GT(max_transfer_error, 0);
CHECK_GT(max_scale_error, 0);
inlier_idxs->clear();
for (size_t i = 0; i < matches.size(); ++i) {
const auto& match = matches[i];
for (size_t j = 0; j < match.geometries2.size(); ++j) {
const auto& geometry2 = match.geometries2[j];
if (ComputeScaleError(match.geometry1, geometry2, tform) <=
max_scale_error &&
ComputeTransferError(match.geometry1, geometry2, tform) <=
max_transfer_error) {
inlier_idxs->emplace_back(i, j);
}
}
}
}
// Compute effective inlier count that satisfy the transfer, scale thresholds.
size_t ComputeEffectiveInlierCount(
const TwoWayTransform& tform,
const std::vector<FeatureGeometryMatch>& matches,
const float max_transfer_error, const float max_scale_error,
const int num_bins) {
CHECK_GT(max_transfer_error, 0);
CHECK_GT(max_scale_error, 0);
CHECK_GT(num_bins, 0);
std::vector<std::pair<float, float>> inlier_coords;
inlier_coords.reserve(matches.size());
float min_x = std::numeric_limits<float>::max();
float min_y = std::numeric_limits<float>::max();
float max_x = 0;
float max_y = 0;
for (const auto& match : matches) {
for (const auto& geometry2 : match.geometries2) {
if (ComputeScaleError(match.geometry1, geometry2, tform) <=
max_scale_error &&
ComputeTransferError(match.geometry1, geometry2, tform) <=
max_transfer_error) {
inlier_coords.emplace_back(match.geometry1.x, match.geometry1.y);
min_x = std::min(min_x, match.geometry1.x);
min_y = std::min(min_y, match.geometry1.y);
max_x = std::max(max_x, match.geometry1.x);
max_y = std::max(max_y, match.geometry1.y);
break;
}
}
}
if (inlier_coords.empty()) {
return 0;
}
const float scale_x = num_bins / (max_x - min_x);
const float scale_y = num_bins / (max_y - min_y);
Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic> counter(num_bins,
num_bins);
counter.setZero();
for (const auto& coord : inlier_coords) {
const int c_x = (coord.first - min_x) * scale_x;
const int c_y = (coord.second - min_y) * scale_y;
counter(std::max(0, std::min(num_bins - 1, c_x)),
std::max(0, std::min(num_bins - 1, c_y))) = 1;
}
return counter.sum();
}
} // namespace
int VoteAndVerify(const VoteAndVerifyOptions& options,
const std::vector<FeatureGeometryMatch>& matches) {
CHECK_GT(options.num_transformations, 0);
CHECK_GT(options.num_trans_bins, 0);
CHECK_EQ(options.num_trans_bins % 2, 0);
CHECK_GT(options.num_scale_bins, 0);
CHECK_EQ(options.num_scale_bins % 2, 0);
CHECK_GT(options.num_angle_bins, 0);
CHECK_EQ(options.num_angle_bins % 2, 0);
CHECK_GT(options.max_image_size, 0);
CHECK_GT(options.min_num_votes, 0);
CHECK_GE(options.confidence, 0);
CHECK_LE(options.confidence, 1);
if (matches.size() < AffineTransformEstimator::kMinNumSamples) {
return 0;
}
const float max_trans = options.max_image_size;
const float kMaxScale = 10.0f;
const float max_log_scale = std::log2(kMaxScale);
const float trans_norm = 1.0f / (2.0f * max_trans);
const float scale_norm = 1.0f / (2.0f * max_log_scale);
const float angle_norm = 1.0f / (2.0f * M_PI);
//////////////////////////////////////////////////////////////////////////////
// Fill the multi-resolution voting histogram.
//////////////////////////////////////////////////////////////////////////////
const int kNumLevels = 6;
std::array<std::unordered_map<size_t, VotingBin>, kNumLevels> bins;
std::unordered_map<size_t, Eigen::Vector4i> coords;
for (const auto& match : matches) {
for (const auto& geometry2 : match.geometries2) {
const auto T =
FeatureGeometry::TransformFromMatch(match.geometry1, geometry2);
if (std::abs(T.tx) > max_trans || std::abs(T.ty) > max_trans) {
continue;
}
const float log_scale = std::log2(T.scale);
if (std::abs(log_scale) > max_log_scale) {
continue;
}
const float x = (T.tx + max_trans) * trans_norm;
const float y = (T.ty + max_trans) * trans_norm;
const float s = (log_scale + max_log_scale) * scale_norm;
const float a = (T.angle + M_PI) * angle_norm;
int n_x = std::min(static_cast<int>(x * options.num_trans_bins),
static_cast<int>(options.num_trans_bins - 1));
int n_y = std::min(static_cast<int>(y * options.num_trans_bins),
static_cast<int>(options.num_trans_bins - 1));
int n_s = std::min(static_cast<int>(s * options.num_scale_bins),
static_cast<int>(options.num_scale_bins - 1));
int n_a = std::min(static_cast<int>(a * options.num_angle_bins),
static_cast<int>(options.num_angle_bins - 1));
for (int level = 0; level < kNumLevels; ++level) {
const uint64_t index =
n_a + options.num_angle_bins *
(n_s + options.num_scale_bins *
(n_x + options.num_trans_bins * n_y));
if (level == 0) {
coords[index] = Eigen::Vector4i(n_a, n_s, n_x, n_y);
}
bins[level][index].Vote(T);
n_x >>= 1;
n_y >>= 1;
n_s >>= 1;
n_a >>= 1;
}
}
}
//////////////////////////////////////////////////////////////////////////////
// Compute the multi-resolution scores for all occupied bins.
//////////////////////////////////////////////////////////////////////////////
std::vector<std::pair<int, float>> bin_scores;
for (const auto& bin : bins[0]) {
if (bin.second.GetNumVotes() >=
static_cast<size_t>(options.min_num_votes)) {
const auto coord = coords.at(bin.first);
int n_a = coord(0);
int n_s = coord(1);
int n_x = coord(2);
int n_y = coord(3);
float score = bin.second.GetNumVotes();
float level_weight = 0.5f;
for (int level = 1; level < kNumLevels; ++level) {
n_x >>= 1;
n_y >>= 1;
n_s >>= 1;
n_a >>= 1;
const uint64_t index =
n_a + options.num_angle_bins *
(n_s + options.num_scale_bins *
(n_x + options.num_trans_bins * n_y));
score += bins[level][index].GetNumVotes() * level_weight;
level_weight *= 0.5f;
}
bin_scores.emplace_back(bin.first, score);
}
}
//////////////////////////////////////////////////////////////////////////////
// Extract the top transformations.
//////////////////////////////////////////////////////////////////////////////
const size_t num_transformations = std::min(
static_cast<size_t>(options.num_transformations), bin_scores.size());
std::partial_sort(bin_scores.begin(),
bin_scores.begin() + num_transformations, bin_scores.end(),
[](const std::pair<int, float>& score1,
const std::pair<int, float>& score2) {
return score1.second > score2.second;
});
//////////////////////////////////////////////////////////////////////////////
// Verify the top transformations.
//////////////////////////////////////////////////////////////////////////////
size_t max_num_trials = std::numeric_limits<size_t>::max();
size_t best_num_inliers = 0;
TwoWayTransform best_tform;
std::vector<std::pair<int, int>> inlier_idxs;
std::vector<Eigen::Vector2d> inlier_points1;
std::vector<Eigen::Vector2d> inlier_points2;
for (size_t i = 0; i < num_transformations && i < max_num_trials; ++i) {
const auto& bin = bins[0].at(bin_scores.at(i).first);
const auto tform = TwoWayTransform(bin.GetTransformation());
ComputeInliers(tform, matches, options.max_transfer_error,
options.max_scale_error, &inlier_idxs);
if (inlier_idxs.size() < best_num_inliers ||
inlier_idxs.size() < AffineTransformEstimator::kMinNumSamples) {
continue;
}
best_num_inliers = inlier_idxs.size();
best_tform = tform;
if (best_num_inliers == matches.size()) {
break;
}
// Collect matching inlier points.
inlier_points1.resize(inlier_idxs.size());
inlier_points2.resize(inlier_idxs.size());
for (size_t j = 0; j < inlier_idxs.size(); ++j) {
const auto& inlier_idx = inlier_idxs[j];
const auto& match = matches.at(inlier_idx.first);
const auto& geometry1 = match.geometry1;
const auto& geometry2 = match.geometries2.at(inlier_idx.second);
inlier_points1[j] = Eigen::Vector2d(geometry1.x, geometry1.y);
inlier_points2[j] = Eigen::Vector2d(geometry2.x, geometry2.y);
}
// Local optimization on matching inlier points.
const Eigen::Matrix<double, 2, 3> A =
AffineTransformEstimator::Estimate(inlier_points1, inlier_points2)[0];
Eigen::Matrix3d A_homogeneous = Eigen::Matrix3d::Identity();
A_homogeneous.topRows<2>() = A;
const Eigen::Matrix<double, 2, 3> inv_A =
A_homogeneous.inverse().topRows<2>();
TwoWayTransform local_tform;
local_tform.A12 = A.leftCols<2>().cast<float>();
local_tform.t12 = A.rightCols<1>().cast<float>();
local_tform.A21 = inv_A.leftCols<2>().cast<float>();
local_tform.t21 = inv_A.rightCols<1>().cast<float>();
ComputeInliers(local_tform, matches, options.max_transfer_error,
options.max_scale_error, &inlier_idxs);
if (inlier_idxs.size() > best_num_inliers) {
best_num_inliers = inlier_idxs.size();
best_tform = local_tform;
if (best_num_inliers == matches.size()) {
break;
}
}
max_num_trials = RANSAC<AffineTransformEstimator>::ComputeNumTrials(
best_num_inliers, matches.size(), options.confidence,
/* num_trials_multiplier = */ 3.0);
}
if (best_num_inliers == 0) {
return 0;
}
const size_t kNumBins = 64;
return ComputeEffectiveInlierCount(best_tform, matches,
options.max_transfer_error,
options.max_scale_error, kNumBins);
}
} // namespace retrieval
} // namespace colmap