ceres-solver and colmap

2b5a2b6 about 3 years ago

25.8 kB

	// Copyright (c) 2022, ETH Zurich and UNC Chapel Hill.
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	//
	// * Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
	// its contributors may be used to endorse or promote products derived
	// from this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.
	//
	// Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)

	#ifndef COLMAP_SRC_BASE_RECONSTRUCTION_H_
	#define COLMAP_SRC_BASE_RECONSTRUCTION_H_

	#include <tuple>
	#include <unordered_map>
	#include <unordered_set>
	#include <vector>

	#include <Eigen/Core>

	#include "base/camera.h"
	#include "base/database.h"
	#include "base/image.h"
	#include "base/point2d.h"
	#include "base/point3d.h"
	#include "base/similarity_transform.h"
	#include "base/track.h"
	#include "estimators/similarity_transform.h"
	#include "optim/loransac.h"
	#include "util/alignment.h"
	#include "util/types.h"

	namespace colmap {

	struct PlyPoint;
	struct RANSACOptions;
	class DatabaseCache;
	class CorrespondenceGraph;

	// Reconstruction class holds all information about a single reconstructed
	// model. It is used by the mapping and bundle adjustment classes and can be
	// written to and read from disk.
	class Reconstruction {
	public:
	struct ImagePairStat {
	// The number of triangulated correspondences between two images.
	size_t num_tri_corrs = 0;
	// The number of total correspondences/matches between two images.
	size_t num_total_corrs = 0;
	};

	Reconstruction();

	// Get number of objects.
	inline size_t NumCameras() const;
	inline size_t NumImages() const;
	inline size_t NumRegImages() const;
	inline size_t NumPoints3D() const;
	inline size_t NumImagePairs() const;

	// Get const objects.
	inline const class Camera& Camera(const camera_t camera_id) const;
	inline const class Image& Image(const image_t image_id) const;
	inline const class Point3D& Point3D(const point3D_t point3D_id) const;
	inline const ImagePairStat& ImagePair(const image_pair_t pair_id) const;
	inline ImagePairStat& ImagePair(const image_t image_id1,
	const image_t image_id2);

	// Get mutable objects.
	inline class Camera& Camera(const camera_t camera_id);
	inline class Image& Image(const image_t image_id);
	inline class Point3D& Point3D(const point3D_t point3D_id);
	inline ImagePairStat& ImagePair(const image_pair_t pair_id);
	inline const ImagePairStat& ImagePair(const image_t image_id1,
	const image_t image_id2) const;

	// Get reference to all objects.
	inline const EIGEN_STL_UMAP(camera_t, class Camera) & Cameras() const;
	inline const EIGEN_STL_UMAP(image_t, class Image) & Images() const;
	inline const std::vector<image_t>& RegImageIds() const;
	inline const EIGEN_STL_UMAP(point3D_t, class Point3D) & Points3D() const;
	inline const std::unordered_map<image_pair_t, ImagePairStat>& ImagePairs()
	const;

	// Identifiers of all 3D points.
	std::unordered_set<point3D_t> Point3DIds() const;

	// Check whether specific object exists.
	inline bool ExistsCamera(const camera_t camera_id) const;
	inline bool ExistsImage(const image_t image_id) const;
	inline bool ExistsPoint3D(const point3D_t point3D_id) const;
	inline bool ExistsImagePair(const image_pair_t pair_id) const;

	// Load data from given `DatabaseCache`.
	void Load(const DatabaseCache& database_cache);

	// Setup all relevant data structures before reconstruction. Note the
	// correspondence graph object must live until `TearDown` is called.
	void SetUp(const CorrespondenceGraph* correspondence_graph);

	// Finalize the Reconstruction after the reconstruction has finished.
	//
	// Once a scene has been finalized, it cannot be used for reconstruction.
	//
	// This removes all not yet registered images and unused cameras, in order to
	// save memory.
	void TearDown();

	// Add new camera. There is only one camera per image, while multiple images
	// might be taken by the same camera.
	void AddCamera(class Camera camera);

	// Add new image.
	void AddImage(class Image image);

	// Add new 3D object, and return its unique ID.
	point3D_t AddPoint3D(
	const Eigen::Vector3d& xyz, Track track,
	const Eigen::Vector3ub& color = Eigen::Vector3ub::Zero());

	// Add observation to existing 3D point.
	void AddObservation(const point3D_t point3D_id, const TrackElement& track_el);

	// Merge two 3D points and return new identifier of new 3D point.
	// The location of the merged 3D point is a weighted average of the two
	// original 3D point's locations according to their track lengths.
	point3D_t MergePoints3D(const point3D_t point3D_id1,
	const point3D_t point3D_id2);

	// Delete a 3D point, and all its references in the observed images.
	void DeletePoint3D(const point3D_t point3D_id);

	// Delete one observation from an image and the corresponding 3D point.
	// Note that this deletes the entire 3D point, if the track has two elements
	// prior to calling this method.
	void DeleteObservation(const image_t image_id, const point2D_t point2D_idx);

	// Delete all 2D points of all images and all 3D points.
	void DeleteAllPoints2DAndPoints3D();

	// Register an existing image.
	void RegisterImage(const image_t image_id);

	// De-register an existing image, and all its references.
	void DeRegisterImage(const image_t image_id);

	// Check if image is registered.
	inline bool IsImageRegistered(const image_t image_id) const;

	// Normalize scene by scaling and translation to avoid degenerate
	// visualization after bundle adjustment and to improve numerical
	// stability of algorithms.
	//
	// Translates scene such that the mean of the camera centers or point
	// locations are at the origin of the coordinate system.
	//
	// Scales scene such that the minimum and maximum camera centers are at the
	// given `extent`, whereas `p0` and `p1` determine the minimum and
	// maximum percentiles of the camera centers considered.
	void Normalize(const double extent = 10.0, const double p0 = 0.1,
	const double p1 = 0.9, const bool use_images = true);

	// Compute the centroid of the 3D points
	Eigen::Vector3d ComputeCentroid(const double p0 = 0.1,
	const double p1 = 0.9) const;

	// Compute the bounding box corners of the 3D points
	std::pair<Eigen::Vector3d, Eigen::Vector3d> ComputeBoundingBox(
	const double p0 = 0.0, const double p1 = 1.0) const;

	// Apply the 3D similarity transformation to all images and points.
	void Transform(const SimilarityTransform3& tform);

	// Creates a cropped reconstruction using the input bounds as corner points
	// of the bounding box containing the included 3D points of the new
	// reconstruction. Only the cameras and images of the included points are
	// registered.
	Reconstruction Crop(
	const std::pair<Eigen::Vector3d, Eigen::Vector3d>& bbox) const;

	// Merge the given reconstruction into this reconstruction by registering the
	// images registered in the given but not in this reconstruction and by
	// merging the two clouds and their tracks. The coordinate frames of the two
	// reconstructions are aligned using the projection centers of common
	// registered images. Return true if the two reconstructions could be merged.
	bool Merge(const Reconstruction& reconstruction,
	const double max_reproj_error);

	// Align the given reconstruction with a set of pre-defined camera positions.
	// Assuming that locations[i] gives the 3D coordinates of the center
	// of projection of the image with name image_names[i].
	template <bool kEstimateScale = true>
	bool Align(const std::vector<std::string>& image_names,
	const std::vector<Eigen::Vector3d>& locations,
	const int min_common_images,
	SimilarityTransform3* tform = nullptr);

	// Robust alignment using RANSAC.
	template <bool kEstimateScale = true>
	bool AlignRobust(const std::vector<std::string>& image_names,
	const std::vector<Eigen::Vector3d>& locations,
	const int min_common_images,
	const RANSACOptions& ransac_options,
	SimilarityTransform3* tform = nullptr);

	// Find specific image by name. Note that this uses linear search.
	const class Image* FindImageWithName(const std::string& name) const;

	// Find images that are both present in this and the given reconstruction.
	std::vector<image_t> FindCommonRegImageIds(
	const Reconstruction& reconstruction) const;

	// Update the image identifiers to match the ones in the database by matching
	// the names of the images.
	void TranscribeImageIdsToDatabase(const Database& database);

	// Filter 3D points with large reprojection error, negative depth, or
	// insufficient triangulation angle.
	//
	// @param max_reproj_error The maximum reprojection error.
	// @param min_tri_angle The minimum triangulation angle.
	// @param point3D_ids The points to be filtered.
	//
	// @return The number of filtered observations.
	size_t FilterPoints3D(const double max_reproj_error,
	const double min_tri_angle,
	const std::unordered_set<point3D_t>& point3D_ids);
	size_t FilterPoints3DInImages(const double max_reproj_error,
	const double min_tri_angle,
	const std::unordered_set<image_t>& image_ids);
	size_t FilterAllPoints3D(const double max_reproj_error,
	const double min_tri_angle);

	// Filter observations that have negative depth.
	//
	// @return The number of filtered observations.
	size_t FilterObservationsWithNegativeDepth();

	// Filter images without observations or bogus camera parameters.
	//
	// @return The identifiers of the filtered images.
	std::vector<image_t> FilterImages(const double min_focal_length_ratio,
	const double max_focal_length_ratio,
	const double max_extra_param);

	// Compute statistics for scene.
	size_t ComputeNumObservations() const;
	double ComputeMeanTrackLength() const;
	double ComputeMeanObservationsPerRegImage() const;
	double ComputeMeanReprojectionError() const;

	// Read data from text or binary file. Prefer binary data if it exists.
	void Read(const std::string& path);
	void Write(const std::string& path) const;

	// Read data from binary/text file.
	void ReadText(const std::string& path);
	void ReadBinary(const std::string& path);

	// Write data from binary/text file.
	void WriteText(const std::string& path) const;
	void WriteBinary(const std::string& path) const;

	// Convert 3D points in reconstruction to PLY point cloud.
	std::vector<PlyPoint> ConvertToPLY() const;

	// Import from other data formats. Note that these import functions are
	// only intended for visualization of data and usable for reconstruction.
	void ImportPLY(const std::string& path);
	void ImportPLY(const std::vector<PlyPoint>& ply_points);

	// Export to other data formats.

	// Exports in NVM format http://ccwu.me/vsfm/doc.html#nvm. Only supports
	// SIMPLE_RADIAL camera model when exporting distortion parameters. When
	// skip_distortion == true it supports all camera models with the caveat that
	// it's using the mean focal length which will be inaccurate for camera models
	// with two focal lengths and distortion.
	bool ExportNVM(const std::string& path, bool skip_distortion = false) const;

	// Exports in CAM format which is a simple text file that contains pose
	// information and camera intrinsics for each image and exports one file per
	// image; it does not include information on the 3D points. The format is as
	// follows (2 lines of text with space separated numbers):
	// <Tvec; 3 values> <Rotation matrix in row-major format; 9 values>
	// <focal_length> <k1> <k2> 1.0 <principal point X> <principal point Y>
	// Note that focal length is relative to the image max(width, height),
	// and principal points x and y are relative to width and height respectively.
	//
	// Only supports SIMPLE_RADIAL and RADIAL camera models when exporting
	// distortion parameters. When skip_distortion == true it supports all camera
	// models with the caveat that it's using the mean focal length which will be
	// inaccurate for camera models with two focal lengths and distortion.
	bool ExportCam(const std::string& path, bool skip_distortion = false) const;

	// Exports in Recon3D format which consists of three text files with the
	// following format and content:
	// 1) imagemap_0.txt: a list of image numeric IDs with one entry per line.
	// 2) urd-images.txt: A list of images with one entry per line as:
	// <image file name> <width> <height>
	// 3) synth_0.out: Contains information for image poses, camera intrinsics,
	// and 3D points as:
	// <N; num images> <M; num points>
	// <N lines of image entries>
	// <M lines of point entries>
	//
	// Each image entry consists of 5 lines as:
	// <focal length> <k1> <k2>
	// <Rotation matrix; 3x3 array>
	// <Tvec; 3 values>
	// Note that the focal length is scaled by 1 / max(width, height)
	//
	// Each point entry consists of 3 lines as:
	// <point x, y, z coordinates>
	// <point RGB color>
	// <K; num track elements> <Track Element 1> ... <Track Element K>
	//
	// Each track elemenet is a sequence of 5 values as:
	// <image ID> <2D point ID> -1.0 <X> <Y>
	// Note that the 2D point coordinates are centered around the principal
	// point and scaled by 1 / max(width, height).
	//
	// When skip_distortion == true it supports all camera models with the
	// caveat that it's using the mean focal length which will be inaccurate
	// for camera models with two focal lengths and distortion.
	bool ExportRecon3D(const std::string& path,
	bool skip_distortion = false) const;

	// Exports in Bundler format https://www.cs.cornell.edu/~snavely/bundler/.
	// Supports SIMPLE_PINHOLE, PINHOLE, SIMPLE_RADIAL and RADIAL camera models
	// when exporting distortion parameters. When skip_distortion == true it
	// supports all camera models with the caveat that it's using the mean focal
	// length which will be inaccurate for camera models with two focal lengths
	// and distortion.
	bool ExportBundler(const std::string& path, const std::string& list_path,
	bool skip_distortion = false) const;

	// Exports 3D points only in PLY format.
	void ExportPLY(const std::string& path) const;

	// Exports in VRML format https://en.wikipedia.org/wiki/VRML.
	void ExportVRML(const std::string& images_path,
	const std::string& points3D_path, const double image_scale,
	const Eigen::Vector3d& image_rgb) const;

	// Extract colors for 3D points of given image. Colors will be extracted
	// only for 3D points which are completely black.
	//
	// @param image_id Identifier of the image for which to extract colors.
	// @param path Absolute or relative path to root folder of image.
	// The image path is determined by concatenating the
	// root path and the name of the image.
	//
	// @return True if image could be read at given path.
	bool ExtractColorsForImage(const image_t image_id, const std::string& path);

	// Extract colors for all 3D points by computing the mean color of all images.
	//
	// @param path Absolute or relative path to root folder of image.
	// The image path is determined by concatenating the
	// root path and the name of the image.
	void ExtractColorsForAllImages(const std::string& path);

	// Create all image sub-directories in the given path.
	void CreateImageDirs(const std::string& path) const;

	private:
	size_t FilterPoints3DWithSmallTriangulationAngle(
	const double min_tri_angle,
	const std::unordered_set<point3D_t>& point3D_ids);
	size_t FilterPoints3DWithLargeReprojectionError(
	const double max_reproj_error,
	const std::unordered_set<point3D_t>& point3D_ids);

	std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d>
	ComputeBoundsAndCentroid(const double p0, const double p1,
	const bool use_images) const;

	void ReadCamerasText(const std::string& path);
	void ReadImagesText(const std::string& path);
	void ReadPoints3DText(const std::string& path);
	void ReadCamerasBinary(const std::string& path);
	void ReadImagesBinary(const std::string& path);
	void ReadPoints3DBinary(const std::string& path);

	void WriteCamerasText(const std::string& path) const;
	void WriteImagesText(const std::string& path) const;
	void WritePoints3DText(const std::string& path) const;
	void WriteCamerasBinary(const std::string& path) const;
	void WriteImagesBinary(const std::string& path) const;
	void WritePoints3DBinary(const std::string& path) const;

	void SetObservationAsTriangulated(const image_t image_id,
	const point2D_t point2D_idx,
	const bool is_continued_point3D);
	void ResetTriObservations(const image_t image_id, const point2D_t point2D_idx,
	const bool is_deleted_point3D);

	const CorrespondenceGraph* correspondence_graph_;

	EIGEN_STL_UMAP(camera_t, class Camera) cameras_;
	EIGEN_STL_UMAP(image_t, class Image) images_;
	EIGEN_STL_UMAP(point3D_t, class Point3D) points3D_;

	std::unordered_map<image_pair_t, ImagePairStat> image_pair_stats_;

	// { image_id, ... } where `images_.at(image_id).registered == true`.
	std::vector<image_t> reg_image_ids_;

	// Total number of added 3D points, used to generate unique identifiers.
	point3D_t num_added_points3D_;
	};

	////////////////////////////////////////////////////////////////////////////////
	// Implementation
	////////////////////////////////////////////////////////////////////////////////

	size_t Reconstruction::NumCameras() const { return cameras_.size(); }

	size_t Reconstruction::NumImages() const { return images_.size(); }

	size_t Reconstruction::NumRegImages() const { return reg_image_ids_.size(); }

	size_t Reconstruction::NumPoints3D() const { return points3D_.size(); }

	size_t Reconstruction::NumImagePairs() const {
	return image_pair_stats_.size();
	}

	const class Camera& Reconstruction::Camera(const camera_t camera_id) const {
	return cameras_.at(camera_id);
	}

	const class Image& Reconstruction::Image(const image_t image_id) const {
	return images_.at(image_id);
	}

	const class Point3D& Reconstruction::Point3D(const point3D_t point3D_id) const {
	return points3D_.at(point3D_id);
	}

	const Reconstruction::ImagePairStat& Reconstruction::ImagePair(
	const image_pair_t pair_id) const {
	return image_pair_stats_.at(pair_id);
	}

	const Reconstruction::ImagePairStat& Reconstruction::ImagePair(
	const image_t image_id1, const image_t image_id2) const {
	const auto pair_id = Database::ImagePairToPairId(image_id1, image_id2);
	return image_pair_stats_.at(pair_id);
	}

	class Camera& Reconstruction::Camera(const camera_t camera_id) {
	return cameras_.at(camera_id);
	}

	class Image& Reconstruction::Image(const image_t image_id) {
	return images_.at(image_id);
	}

	class Point3D& Reconstruction::Point3D(const point3D_t point3D_id) {
	return points3D_.at(point3D_id);
	}

	Reconstruction::ImagePairStat& Reconstruction::ImagePair(
	const image_pair_t pair_id) {
	return image_pair_stats_.at(pair_id);
	}

	Reconstruction::ImagePairStat& Reconstruction::ImagePair(
	const image_t image_id1, const image_t image_id2) {
	const auto pair_id = Database::ImagePairToPairId(image_id1, image_id2);
	return image_pair_stats_.at(pair_id);
	}

	const EIGEN_STL_UMAP(camera_t, Camera) & Reconstruction::Cameras() const {
	return cameras_;
	}

	const EIGEN_STL_UMAP(image_t, class Image) & Reconstruction::Images() const {
	return images_;
	}

	const std::vector<image_t>& Reconstruction::RegImageIds() const {
	return reg_image_ids_;
	}

	const EIGEN_STL_UMAP(point3D_t, Point3D) & Reconstruction::Points3D() const {
	return points3D_;
	}

	const std::unordered_map<image_pair_t, Reconstruction::ImagePairStat>&
	Reconstruction::ImagePairs() const {
	return image_pair_stats_;
	}

	bool Reconstruction::ExistsCamera(const camera_t camera_id) const {
	return cameras_.find(camera_id) != cameras_.end();
	}

	bool Reconstruction::ExistsImage(const image_t image_id) const {
	return images_.find(image_id) != images_.end();
	}

	bool Reconstruction::ExistsPoint3D(const point3D_t point3D_id) const {
	return points3D_.find(point3D_id) != points3D_.end();
	}

	bool Reconstruction::ExistsImagePair(const image_pair_t pair_id) const {
	return image_pair_stats_.find(pair_id) != image_pair_stats_.end();
	}

	bool Reconstruction::IsImageRegistered(const image_t image_id) const {
	return Image(image_id).IsRegistered();
	}

	template <bool kEstimateScale>
	bool Reconstruction::Align(const std::vector<std::string>& image_names,
	const std::vector<Eigen::Vector3d>& locations,
	const int min_common_images,
	SimilarityTransform3* tform) {
	CHECK_GE(min_common_images, 3);
	CHECK_EQ(image_names.size(), locations.size());

	// Find out which images are contained in the reconstruction and get the
	// positions of their camera centers.
	std::unordered_set<image_t> common_image_ids;
	std::vector<Eigen::Vector3d> src;
	std::vector<Eigen::Vector3d> dst;
	for (size_t i = 0; i < image_names.size(); ++i) {
	const class Image* image = FindImageWithName(image_names[i]);
	if (image == nullptr) {
	continue;
	}

	if (!IsImageRegistered(image->ImageId())) {
	continue;
	}

	// Ignore duplicate images.
	if (common_image_ids.count(image->ImageId()) > 0) {
	continue;
	}

	common_image_ids.insert(image->ImageId());
	src.push_back(image->ProjectionCenter());
	dst.push_back(locations[i]);
	}

	// Only compute the alignment if there are enough correspondences.
	if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
	return false;
	}

	SimilarityTransform3 transform;
	if (!transform.Estimate<kEstimateScale>(src, dst)) {
	return false;
	}

	Transform(transform);

	if (tform != nullptr) {
	*tform = transform;
	}

	return true;
	}

	template <bool kEstimateScale>
	bool Reconstruction::AlignRobust(const std::vector<std::string>& image_names,
	const std::vector<Eigen::Vector3d>& locations,
	const int min_common_images,
	const RANSACOptions& ransac_options,
	SimilarityTransform3* tform) {
	CHECK_GE(min_common_images, 3);
	CHECK_EQ(image_names.size(), locations.size());

	// Find out which images are contained in the reconstruction and get the
	// positions of their camera centers.
	std::unordered_set<image_t> common_image_ids;
	std::vector<Eigen::Vector3d> src;
	std::vector<Eigen::Vector3d> dst;
	for (size_t i = 0; i < image_names.size(); ++i) {
	const class Image* image = FindImageWithName(image_names[i]);
	if (image == nullptr) {
	continue;
	}

	if (!IsImageRegistered(image->ImageId())) {
	continue;
	}

	// Ignore duplicate images.
	if (common_image_ids.count(image->ImageId()) > 0) {
	continue;
	}

	common_image_ids.insert(image->ImageId());
	src.push_back(image->ProjectionCenter());
	dst.push_back(locations[i]);
	}

	// Only compute the alignment if there are enough correspondences.
	if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
	return false;
	}

	LORANSAC<SimilarityTransformEstimator<3, kEstimateScale>,
	SimilarityTransformEstimator<3, kEstimateScale>>
	ransac(ransac_options);

	const auto report = ransac.Estimate(src, dst);

	if (report.support.num_inliers < static_cast<size_t>(min_common_images)) {
	return false;
	}

	SimilarityTransform3 transform = SimilarityTransform3(report.model);
	Transform(transform);

	if (tform != nullptr) {
	*tform = transform;
	}

	return true;
	}

	} // namespace colmap

	#endif // COLMAP_SRC_BASE_RECONSTRUCTION_H_