ceres-solver / include /colmap /base /reconstruction.h
camenduru's picture
ceres-solver and colmap
2b5a2b6
// Copyright (c) 2022, ETH Zurich and UNC Chapel Hill.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
#ifndef COLMAP_SRC_BASE_RECONSTRUCTION_H_
#define COLMAP_SRC_BASE_RECONSTRUCTION_H_
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <Eigen/Core>
#include "base/camera.h"
#include "base/database.h"
#include "base/image.h"
#include "base/point2d.h"
#include "base/point3d.h"
#include "base/similarity_transform.h"
#include "base/track.h"
#include "estimators/similarity_transform.h"
#include "optim/loransac.h"
#include "util/alignment.h"
#include "util/types.h"
namespace colmap {
struct PlyPoint;
struct RANSACOptions;
class DatabaseCache;
class CorrespondenceGraph;
// Reconstruction class holds all information about a single reconstructed
// model. It is used by the mapping and bundle adjustment classes and can be
// written to and read from disk.
class Reconstruction {
public:
struct ImagePairStat {
// The number of triangulated correspondences between two images.
size_t num_tri_corrs = 0;
// The number of total correspondences/matches between two images.
size_t num_total_corrs = 0;
};
Reconstruction();
// Get number of objects.
inline size_t NumCameras() const;
inline size_t NumImages() const;
inline size_t NumRegImages() const;
inline size_t NumPoints3D() const;
inline size_t NumImagePairs() const;
// Get const objects.
inline const class Camera& Camera(const camera_t camera_id) const;
inline const class Image& Image(const image_t image_id) const;
inline const class Point3D& Point3D(const point3D_t point3D_id) const;
inline const ImagePairStat& ImagePair(const image_pair_t pair_id) const;
inline ImagePairStat& ImagePair(const image_t image_id1,
const image_t image_id2);
// Get mutable objects.
inline class Camera& Camera(const camera_t camera_id);
inline class Image& Image(const image_t image_id);
inline class Point3D& Point3D(const point3D_t point3D_id);
inline ImagePairStat& ImagePair(const image_pair_t pair_id);
inline const ImagePairStat& ImagePair(const image_t image_id1,
const image_t image_id2) const;
// Get reference to all objects.
inline const EIGEN_STL_UMAP(camera_t, class Camera) & Cameras() const;
inline const EIGEN_STL_UMAP(image_t, class Image) & Images() const;
inline const std::vector<image_t>& RegImageIds() const;
inline const EIGEN_STL_UMAP(point3D_t, class Point3D) & Points3D() const;
inline const std::unordered_map<image_pair_t, ImagePairStat>& ImagePairs()
const;
// Identifiers of all 3D points.
std::unordered_set<point3D_t> Point3DIds() const;
// Check whether specific object exists.
inline bool ExistsCamera(const camera_t camera_id) const;
inline bool ExistsImage(const image_t image_id) const;
inline bool ExistsPoint3D(const point3D_t point3D_id) const;
inline bool ExistsImagePair(const image_pair_t pair_id) const;
// Load data from given `DatabaseCache`.
void Load(const DatabaseCache& database_cache);
// Setup all relevant data structures before reconstruction. Note the
// correspondence graph object must live until `TearDown` is called.
void SetUp(const CorrespondenceGraph* correspondence_graph);
// Finalize the Reconstruction after the reconstruction has finished.
//
// Once a scene has been finalized, it cannot be used for reconstruction.
//
// This removes all not yet registered images and unused cameras, in order to
// save memory.
void TearDown();
// Add new camera. There is only one camera per image, while multiple images
// might be taken by the same camera.
void AddCamera(class Camera camera);
// Add new image.
void AddImage(class Image image);
// Add new 3D object, and return its unique ID.
point3D_t AddPoint3D(
const Eigen::Vector3d& xyz, Track track,
const Eigen::Vector3ub& color = Eigen::Vector3ub::Zero());
// Add observation to existing 3D point.
void AddObservation(const point3D_t point3D_id, const TrackElement& track_el);
// Merge two 3D points and return new identifier of new 3D point.
// The location of the merged 3D point is a weighted average of the two
// original 3D point's locations according to their track lengths.
point3D_t MergePoints3D(const point3D_t point3D_id1,
const point3D_t point3D_id2);
// Delete a 3D point, and all its references in the observed images.
void DeletePoint3D(const point3D_t point3D_id);
// Delete one observation from an image and the corresponding 3D point.
// Note that this deletes the entire 3D point, if the track has two elements
// prior to calling this method.
void DeleteObservation(const image_t image_id, const point2D_t point2D_idx);
// Delete all 2D points of all images and all 3D points.
void DeleteAllPoints2DAndPoints3D();
// Register an existing image.
void RegisterImage(const image_t image_id);
// De-register an existing image, and all its references.
void DeRegisterImage(const image_t image_id);
// Check if image is registered.
inline bool IsImageRegistered(const image_t image_id) const;
// Normalize scene by scaling and translation to avoid degenerate
// visualization after bundle adjustment and to improve numerical
// stability of algorithms.
//
// Translates scene such that the mean of the camera centers or point
// locations are at the origin of the coordinate system.
//
// Scales scene such that the minimum and maximum camera centers are at the
// given `extent`, whereas `p0` and `p1` determine the minimum and
// maximum percentiles of the camera centers considered.
void Normalize(const double extent = 10.0, const double p0 = 0.1,
const double p1 = 0.9, const bool use_images = true);
// Compute the centroid of the 3D points
Eigen::Vector3d ComputeCentroid(const double p0 = 0.1,
const double p1 = 0.9) const;
// Compute the bounding box corners of the 3D points
std::pair<Eigen::Vector3d, Eigen::Vector3d> ComputeBoundingBox(
const double p0 = 0.0, const double p1 = 1.0) const;
// Apply the 3D similarity transformation to all images and points.
void Transform(const SimilarityTransform3& tform);
// Creates a cropped reconstruction using the input bounds as corner points
// of the bounding box containing the included 3D points of the new
// reconstruction. Only the cameras and images of the included points are
// registered.
Reconstruction Crop(
const std::pair<Eigen::Vector3d, Eigen::Vector3d>& bbox) const;
// Merge the given reconstruction into this reconstruction by registering the
// images registered in the given but not in this reconstruction and by
// merging the two clouds and their tracks. The coordinate frames of the two
// reconstructions are aligned using the projection centers of common
// registered images. Return true if the two reconstructions could be merged.
bool Merge(const Reconstruction& reconstruction,
const double max_reproj_error);
// Align the given reconstruction with a set of pre-defined camera positions.
// Assuming that locations[i] gives the 3D coordinates of the center
// of projection of the image with name image_names[i].
template <bool kEstimateScale = true>
bool Align(const std::vector<std::string>& image_names,
const std::vector<Eigen::Vector3d>& locations,
const int min_common_images,
SimilarityTransform3* tform = nullptr);
// Robust alignment using RANSAC.
template <bool kEstimateScale = true>
bool AlignRobust(const std::vector<std::string>& image_names,
const std::vector<Eigen::Vector3d>& locations,
const int min_common_images,
const RANSACOptions& ransac_options,
SimilarityTransform3* tform = nullptr);
// Find specific image by name. Note that this uses linear search.
const class Image* FindImageWithName(const std::string& name) const;
// Find images that are both present in this and the given reconstruction.
std::vector<image_t> FindCommonRegImageIds(
const Reconstruction& reconstruction) const;
// Update the image identifiers to match the ones in the database by matching
// the names of the images.
void TranscribeImageIdsToDatabase(const Database& database);
// Filter 3D points with large reprojection error, negative depth, or
// insufficient triangulation angle.
//
// @param max_reproj_error The maximum reprojection error.
// @param min_tri_angle The minimum triangulation angle.
// @param point3D_ids The points to be filtered.
//
// @return The number of filtered observations.
size_t FilterPoints3D(const double max_reproj_error,
const double min_tri_angle,
const std::unordered_set<point3D_t>& point3D_ids);
size_t FilterPoints3DInImages(const double max_reproj_error,
const double min_tri_angle,
const std::unordered_set<image_t>& image_ids);
size_t FilterAllPoints3D(const double max_reproj_error,
const double min_tri_angle);
// Filter observations that have negative depth.
//
// @return The number of filtered observations.
size_t FilterObservationsWithNegativeDepth();
// Filter images without observations or bogus camera parameters.
//
// @return The identifiers of the filtered images.
std::vector<image_t> FilterImages(const double min_focal_length_ratio,
const double max_focal_length_ratio,
const double max_extra_param);
// Compute statistics for scene.
size_t ComputeNumObservations() const;
double ComputeMeanTrackLength() const;
double ComputeMeanObservationsPerRegImage() const;
double ComputeMeanReprojectionError() const;
// Read data from text or binary file. Prefer binary data if it exists.
void Read(const std::string& path);
void Write(const std::string& path) const;
// Read data from binary/text file.
void ReadText(const std::string& path);
void ReadBinary(const std::string& path);
// Write data from binary/text file.
void WriteText(const std::string& path) const;
void WriteBinary(const std::string& path) const;
// Convert 3D points in reconstruction to PLY point cloud.
std::vector<PlyPoint> ConvertToPLY() const;
// Import from other data formats. Note that these import functions are
// only intended for visualization of data and usable for reconstruction.
void ImportPLY(const std::string& path);
void ImportPLY(const std::vector<PlyPoint>& ply_points);
// Export to other data formats.
// Exports in NVM format http://ccwu.me/vsfm/doc.html#nvm. Only supports
// SIMPLE_RADIAL camera model when exporting distortion parameters. When
// skip_distortion == true it supports all camera models with the caveat that
// it's using the mean focal length which will be inaccurate for camera models
// with two focal lengths and distortion.
bool ExportNVM(const std::string& path, bool skip_distortion = false) const;
// Exports in CAM format which is a simple text file that contains pose
// information and camera intrinsics for each image and exports one file per
// image; it does not include information on the 3D points. The format is as
// follows (2 lines of text with space separated numbers):
// <Tvec; 3 values> <Rotation matrix in row-major format; 9 values>
// <focal_length> <k1> <k2> 1.0 <principal point X> <principal point Y>
// Note that focal length is relative to the image max(width, height),
// and principal points x and y are relative to width and height respectively.
//
// Only supports SIMPLE_RADIAL and RADIAL camera models when exporting
// distortion parameters. When skip_distortion == true it supports all camera
// models with the caveat that it's using the mean focal length which will be
// inaccurate for camera models with two focal lengths and distortion.
bool ExportCam(const std::string& path, bool skip_distortion = false) const;
// Exports in Recon3D format which consists of three text files with the
// following format and content:
// 1) imagemap_0.txt: a list of image numeric IDs with one entry per line.
// 2) urd-images.txt: A list of images with one entry per line as:
// <image file name> <width> <height>
// 3) synth_0.out: Contains information for image poses, camera intrinsics,
// and 3D points as:
// <N; num images> <M; num points>
// <N lines of image entries>
// <M lines of point entries>
//
// Each image entry consists of 5 lines as:
// <focal length> <k1> <k2>
// <Rotation matrix; 3x3 array>
// <Tvec; 3 values>
// Note that the focal length is scaled by 1 / max(width, height)
//
// Each point entry consists of 3 lines as:
// <point x, y, z coordinates>
// <point RGB color>
// <K; num track elements> <Track Element 1> ... <Track Element K>
//
// Each track elemenet is a sequence of 5 values as:
// <image ID> <2D point ID> -1.0 <X> <Y>
// Note that the 2D point coordinates are centered around the principal
// point and scaled by 1 / max(width, height).
//
// When skip_distortion == true it supports all camera models with the
// caveat that it's using the mean focal length which will be inaccurate
// for camera models with two focal lengths and distortion.
bool ExportRecon3D(const std::string& path,
bool skip_distortion = false) const;
// Exports in Bundler format https://www.cs.cornell.edu/~snavely/bundler/.
// Supports SIMPLE_PINHOLE, PINHOLE, SIMPLE_RADIAL and RADIAL camera models
// when exporting distortion parameters. When skip_distortion == true it
// supports all camera models with the caveat that it's using the mean focal
// length which will be inaccurate for camera models with two focal lengths
// and distortion.
bool ExportBundler(const std::string& path, const std::string& list_path,
bool skip_distortion = false) const;
// Exports 3D points only in PLY format.
void ExportPLY(const std::string& path) const;
// Exports in VRML format https://en.wikipedia.org/wiki/VRML.
void ExportVRML(const std::string& images_path,
const std::string& points3D_path, const double image_scale,
const Eigen::Vector3d& image_rgb) const;
// Extract colors for 3D points of given image. Colors will be extracted
// only for 3D points which are completely black.
//
// @param image_id Identifier of the image for which to extract colors.
// @param path Absolute or relative path to root folder of image.
// The image path is determined by concatenating the
// root path and the name of the image.
//
// @return True if image could be read at given path.
bool ExtractColorsForImage(const image_t image_id, const std::string& path);
// Extract colors for all 3D points by computing the mean color of all images.
//
// @param path Absolute or relative path to root folder of image.
// The image path is determined by concatenating the
// root path and the name of the image.
void ExtractColorsForAllImages(const std::string& path);
// Create all image sub-directories in the given path.
void CreateImageDirs(const std::string& path) const;
private:
size_t FilterPoints3DWithSmallTriangulationAngle(
const double min_tri_angle,
const std::unordered_set<point3D_t>& point3D_ids);
size_t FilterPoints3DWithLargeReprojectionError(
const double max_reproj_error,
const std::unordered_set<point3D_t>& point3D_ids);
std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d>
ComputeBoundsAndCentroid(const double p0, const double p1,
const bool use_images) const;
void ReadCamerasText(const std::string& path);
void ReadImagesText(const std::string& path);
void ReadPoints3DText(const std::string& path);
void ReadCamerasBinary(const std::string& path);
void ReadImagesBinary(const std::string& path);
void ReadPoints3DBinary(const std::string& path);
void WriteCamerasText(const std::string& path) const;
void WriteImagesText(const std::string& path) const;
void WritePoints3DText(const std::string& path) const;
void WriteCamerasBinary(const std::string& path) const;
void WriteImagesBinary(const std::string& path) const;
void WritePoints3DBinary(const std::string& path) const;
void SetObservationAsTriangulated(const image_t image_id,
const point2D_t point2D_idx,
const bool is_continued_point3D);
void ResetTriObservations(const image_t image_id, const point2D_t point2D_idx,
const bool is_deleted_point3D);
const CorrespondenceGraph* correspondence_graph_;
EIGEN_STL_UMAP(camera_t, class Camera) cameras_;
EIGEN_STL_UMAP(image_t, class Image) images_;
EIGEN_STL_UMAP(point3D_t, class Point3D) points3D_;
std::unordered_map<image_pair_t, ImagePairStat> image_pair_stats_;
// { image_id, ... } where `images_.at(image_id).registered == true`.
std::vector<image_t> reg_image_ids_;
// Total number of added 3D points, used to generate unique identifiers.
point3D_t num_added_points3D_;
};
////////////////////////////////////////////////////////////////////////////////
// Implementation
////////////////////////////////////////////////////////////////////////////////
size_t Reconstruction::NumCameras() const { return cameras_.size(); }
size_t Reconstruction::NumImages() const { return images_.size(); }
size_t Reconstruction::NumRegImages() const { return reg_image_ids_.size(); }
size_t Reconstruction::NumPoints3D() const { return points3D_.size(); }
size_t Reconstruction::NumImagePairs() const {
return image_pair_stats_.size();
}
const class Camera& Reconstruction::Camera(const camera_t camera_id) const {
return cameras_.at(camera_id);
}
const class Image& Reconstruction::Image(const image_t image_id) const {
return images_.at(image_id);
}
const class Point3D& Reconstruction::Point3D(const point3D_t point3D_id) const {
return points3D_.at(point3D_id);
}
const Reconstruction::ImagePairStat& Reconstruction::ImagePair(
const image_pair_t pair_id) const {
return image_pair_stats_.at(pair_id);
}
const Reconstruction::ImagePairStat& Reconstruction::ImagePair(
const image_t image_id1, const image_t image_id2) const {
const auto pair_id = Database::ImagePairToPairId(image_id1, image_id2);
return image_pair_stats_.at(pair_id);
}
class Camera& Reconstruction::Camera(const camera_t camera_id) {
return cameras_.at(camera_id);
}
class Image& Reconstruction::Image(const image_t image_id) {
return images_.at(image_id);
}
class Point3D& Reconstruction::Point3D(const point3D_t point3D_id) {
return points3D_.at(point3D_id);
}
Reconstruction::ImagePairStat& Reconstruction::ImagePair(
const image_pair_t pair_id) {
return image_pair_stats_.at(pair_id);
}
Reconstruction::ImagePairStat& Reconstruction::ImagePair(
const image_t image_id1, const image_t image_id2) {
const auto pair_id = Database::ImagePairToPairId(image_id1, image_id2);
return image_pair_stats_.at(pair_id);
}
const EIGEN_STL_UMAP(camera_t, Camera) & Reconstruction::Cameras() const {
return cameras_;
}
const EIGEN_STL_UMAP(image_t, class Image) & Reconstruction::Images() const {
return images_;
}
const std::vector<image_t>& Reconstruction::RegImageIds() const {
return reg_image_ids_;
}
const EIGEN_STL_UMAP(point3D_t, Point3D) & Reconstruction::Points3D() const {
return points3D_;
}
const std::unordered_map<image_pair_t, Reconstruction::ImagePairStat>&
Reconstruction::ImagePairs() const {
return image_pair_stats_;
}
bool Reconstruction::ExistsCamera(const camera_t camera_id) const {
return cameras_.find(camera_id) != cameras_.end();
}
bool Reconstruction::ExistsImage(const image_t image_id) const {
return images_.find(image_id) != images_.end();
}
bool Reconstruction::ExistsPoint3D(const point3D_t point3D_id) const {
return points3D_.find(point3D_id) != points3D_.end();
}
bool Reconstruction::ExistsImagePair(const image_pair_t pair_id) const {
return image_pair_stats_.find(pair_id) != image_pair_stats_.end();
}
bool Reconstruction::IsImageRegistered(const image_t image_id) const {
return Image(image_id).IsRegistered();
}
template <bool kEstimateScale>
bool Reconstruction::Align(const std::vector<std::string>& image_names,
const std::vector<Eigen::Vector3d>& locations,
const int min_common_images,
SimilarityTransform3* tform) {
CHECK_GE(min_common_images, 3);
CHECK_EQ(image_names.size(), locations.size());
// Find out which images are contained in the reconstruction and get the
// positions of their camera centers.
std::unordered_set<image_t> common_image_ids;
std::vector<Eigen::Vector3d> src;
std::vector<Eigen::Vector3d> dst;
for (size_t i = 0; i < image_names.size(); ++i) {
const class Image* image = FindImageWithName(image_names[i]);
if (image == nullptr) {
continue;
}
if (!IsImageRegistered(image->ImageId())) {
continue;
}
// Ignore duplicate images.
if (common_image_ids.count(image->ImageId()) > 0) {
continue;
}
common_image_ids.insert(image->ImageId());
src.push_back(image->ProjectionCenter());
dst.push_back(locations[i]);
}
// Only compute the alignment if there are enough correspondences.
if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
return false;
}
SimilarityTransform3 transform;
if (!transform.Estimate<kEstimateScale>(src, dst)) {
return false;
}
Transform(transform);
if (tform != nullptr) {
*tform = transform;
}
return true;
}
template <bool kEstimateScale>
bool Reconstruction::AlignRobust(const std::vector<std::string>& image_names,
const std::vector<Eigen::Vector3d>& locations,
const int min_common_images,
const RANSACOptions& ransac_options,
SimilarityTransform3* tform) {
CHECK_GE(min_common_images, 3);
CHECK_EQ(image_names.size(), locations.size());
// Find out which images are contained in the reconstruction and get the
// positions of their camera centers.
std::unordered_set<image_t> common_image_ids;
std::vector<Eigen::Vector3d> src;
std::vector<Eigen::Vector3d> dst;
for (size_t i = 0; i < image_names.size(); ++i) {
const class Image* image = FindImageWithName(image_names[i]);
if (image == nullptr) {
continue;
}
if (!IsImageRegistered(image->ImageId())) {
continue;
}
// Ignore duplicate images.
if (common_image_ids.count(image->ImageId()) > 0) {
continue;
}
common_image_ids.insert(image->ImageId());
src.push_back(image->ProjectionCenter());
dst.push_back(locations[i]);
}
// Only compute the alignment if there are enough correspondences.
if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
return false;
}
LORANSAC<SimilarityTransformEstimator<3, kEstimateScale>,
SimilarityTransformEstimator<3, kEstimateScale>>
ransac(ransac_options);
const auto report = ransac.Estimate(src, dst);
if (report.support.num_inliers < static_cast<size_t>(min_common_images)) {
return false;
}
SimilarityTransform3 transform = SimilarityTransform3(report.model);
Transform(transform);
if (tform != nullptr) {
*tform = transform;
}
return true;
}
} // namespace colmap
#endif // COLMAP_SRC_BASE_RECONSTRUCTION_H_