| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include <vector> |
| | #include <iostream> |
| | #include <utility> |
| | #include <algorithm> |
| | #include <fstream> |
| | #include <iomanip> |
| | using std::vector; |
| | using std::cout; |
| | using std::pair; |
| | using std::ofstream; |
| |
|
| | #include <stdlib.h> |
| | #include <math.h> |
| | #include <float.h> |
| | #include "pba.h" |
| | #include "SparseBundleCU.h" |
| |
|
| | #include "ProgramCU.h" |
| |
|
| | using namespace pba::ProgramCU; |
| |
|
| | #ifdef _WIN32 |
| | #define finite _finite |
| | #endif |
| |
|
| | namespace pba { |
| |
|
| | typedef float float_t; |
| | |
| |
|
| | #define CHECK_VEC(v1, v2) \ |
| | for (size_t j = 0; j < v1.size(); ++j) { \ |
| | if (v1[j] != v2[j]) { \ |
| | different++; \ |
| | std::cout << i << ' ' << j << ' ' << v1[j] << ' ' << v2[j] << '\n'; \ |
| | } \ |
| | } |
| | #define DEBUG_FUNCN(v, func, input, N) \ |
| | if (__debug_pba && v.IsValid()) { \ |
| | vector<float> buf(v.GetLength()), buf_(v.GetLength()); \ |
| | for (int i = 0; i < N; ++i) { \ |
| | int different = 0; \ |
| | func input; \ |
| | ProgramCU::FinishWorkCUDA(); \ |
| | if (i > 0) { \ |
| | v.CopyToHost(&buf_[0]); \ |
| | CHECK_VEC(buf, buf_); \ |
| | } else { \ |
| | v.CopyToHost(&buf[0]); \ |
| | } \ |
| | if (different != 0) \ |
| | std::cout << #func << " : " << i << " : " << different << '\n'; \ |
| | } \ |
| | } |
| | #define DEBUG_FUNC(v, func, input) DEBUG_FUNCN(v, func, input, 2) |
| |
|
| | SparseBundleCU::SparseBundleCU(int device) |
| | : ParallelBA(PBA_INVALID_DEVICE), |
| | _num_camera(0), |
| | _num_point(0), |
| | _num_imgpt(0), |
| | _num_imgpt_q(0), |
| | _camera_data(NULL), |
| | _point_data(NULL), |
| | _imgpt_data(NULL), |
| | _camera_idx(NULL), |
| | _point_idx(NULL), |
| | _projection_sse(0) { |
| | __selected_device = device; |
| | } |
| |
|
| | size_t SparseBundleCU::GetMemCapacity() { |
| | if (__selected_device != __current_device) SetCudaDevice(__selected_device); |
| | size_t sz = ProgramCU::GetCudaMemoryCap(); |
| | if (sz < 1024) std::cout << "ERROR: CUDA is unlikely to be supported!\n"; |
| | return sz < 1024 ? 0 : sz; |
| | } |
| |
|
| | void SparseBundleCU::SetCameraData(size_t ncam, CameraT* cams) { |
| | if (sizeof(CameraT) != 16 * sizeof(float)) exit(0); |
| | _num_camera = (int)ncam; |
| | _camera_data = cams; |
| | _focal_mask = NULL; |
| | } |
| |
|
| | void SparseBundleCU::SetFocalMask(const int* fmask, float weight) { |
| | _focal_mask = fmask; |
| | _weight_q = weight; |
| | } |
| |
|
| | void SparseBundleCU::SetPointData(size_t npoint, Point3D* pts) { |
| | _num_point = (int)npoint; |
| | _point_data = (float*)pts; |
| | } |
| |
|
| | void SparseBundleCU::SetProjection(size_t nproj, const Point2D* imgpts, |
| | const int* point_idx, const int* cam_idx) { |
| | _num_imgpt = (int)nproj; |
| | _imgpt_data = (float*)imgpts; |
| | _camera_idx = cam_idx; |
| | _point_idx = point_idx; |
| | _imgpt_datax.resize(0); |
| | } |
| |
|
| | float SparseBundleCU::GetMeanSquaredError() { |
| | return float(_projection_sse / |
| | (_num_imgpt * __focal_scaling * __focal_scaling)); |
| | } |
| |
|
| | void SparseBundleCU::BundleAdjustment() { |
| | if (ValidateInputData() != STATUS_SUCCESS) return; |
| |
|
| | |
| |
|
| | |
| | TimerBA timer(this, TIMER_OVERALL); |
| |
|
| | NormalizeData(); |
| | if (InitializeBundle() != STATUS_SUCCESS) { |
| | |
| | } else if (__profile_pba) { |
| | |
| | RunProfileSteps(); |
| | } else { |
| | |
| | AdjustBundleAdjsutmentMode(); |
| | NonlinearOptimizeLM(); |
| | TransferDataToHost(); |
| | } |
| | DenormalizeData(); |
| | } |
| |
|
| | int SparseBundleCU::RunBundleAdjustment() { |
| | if (__warmup_device) WarmupDevice(); |
| | ResetBundleStatistics(); |
| | BundleAdjustment(); |
| | if (__num_lm_success > 0) |
| | SaveBundleStatistics(_num_camera, _num_point, _num_imgpt); |
| | if (__num_lm_success > 0) PrintBundleStatistics(); |
| | ResetTemporarySetting(); |
| | return __num_lm_success; |
| | } |
| |
|
| | bool SparseBundleCU::InitializeBundleGPU() { |
| | bool previous_allocated = __memory_usage > 0; |
| |
|
| | bool success = TransferDataToGPU() && InitializeStorageForCG(); |
| | if (!success && previous_allocated) { |
| | if (__verbose_level) std::cout << "WARNING: try clean allocation\n"; |
| | ClearPreviousError(); |
| | ReleaseAllocatedData(); |
| | success = TransferDataToGPU() && InitializeStorageForCG(); |
| | } |
| |
|
| | if (!success && __jc_store_original) { |
| | if (__verbose_level) std::cout << "WARNING: try not storing original JC\n"; |
| | __jc_store_original = false; |
| | ClearPreviousError(); |
| | ReleaseAllocatedData(); |
| | success = TransferDataToGPU() && InitializeStorageForCG(); |
| | } |
| | if (!success && __jc_store_transpose) { |
| | if (__verbose_level) std::cout << "WARNING: try not storing transpose JC\n"; |
| | __jc_store_transpose = false; |
| | ClearPreviousError(); |
| | ReleaseAllocatedData(); |
| | success = TransferDataToGPU() && InitializeStorageForCG(); |
| | } |
| | if (!success && !__no_jacobian_store) { |
| | if (__verbose_level) std::cout << "WARNING: switch to memory saving mode\n"; |
| | __no_jacobian_store = true; |
| | ClearPreviousError(); |
| | ReleaseAllocatedData(); |
| | success = TransferDataToGPU() && InitializeStorageForCG(); |
| | } |
| | return success; |
| | } |
| |
|
| | int SparseBundleCU::ValidateInputData() { |
| | if (_camera_data == NULL) return STATUS_CAMERA_MISSING; |
| | if (_point_data == NULL) return STATUS_POINT_MISSING; |
| | if (_imgpt_data == NULL) return STATUS_MEASURMENT_MISSING; |
| | if (_camera_idx == NULL || _point_idx == NULL) |
| | return STATUS_PROJECTION_MISSING; |
| | return STATUS_SUCCESS; |
| | } |
| |
|
| | void SparseBundleCU::WarmupDevice() { |
| | std::cout << "Warm up device with storage allocation...\n"; |
| | if (__selected_device != __current_device) SetCudaDevice(__selected_device); |
| | CheckRequiredMemX(); |
| | InitializeBundleGPU(); |
| | } |
| |
|
| | int SparseBundleCU::InitializeBundle() { |
| | |
| | TimerBA timer(this, TIMER_GPU_ALLOCATION); |
| | if (__selected_device != __current_device) SetCudaDevice(__selected_device); |
| | CheckRequiredMemX(); |
| | ReserveStorageAuto(); |
| | if (!InitializeBundleGPU()) return STATUS_ALLOCATION_FAIL; |
| | return STATUS_SUCCESS; |
| | } |
| |
|
| | int SparseBundleCU::GetParameterLength() { |
| | return _num_camera * 8 + 4 * _num_point; |
| | } |
| |
|
| | bool SparseBundleCU::CheckRequiredMemX() { |
| | if (CheckRequiredMem(0)) return true; |
| | if (__jc_store_original) { |
| | if (__verbose_level) std::cout << "NOTE: not storing original JC\n"; |
| | __jc_store_original = false; |
| | if (CheckRequiredMem(1)) return true; |
| | } |
| | if (__jc_store_transpose) { |
| | if (__verbose_level) std::cout << "NOTE: not storing camera Jacobian\n"; |
| | __jc_store_transpose = false; |
| | if (CheckRequiredMem(1)) return true; |
| | } |
| | if (!__no_jacobian_store) { |
| | if (__verbose_level) std::cout << "NOTE: not storing any Jacobian\n"; |
| | __no_jacobian_store = true; |
| | if (CheckRequiredMem(1)) return true; |
| | } |
| | return false; |
| | } |
| |
|
| | bool SparseBundleCU::CheckRequiredMem(int fresh) { |
| | int m = _num_camera, n = _num_point, k = _num_imgpt; |
| | #ifdef PBA_CUDA_ALLOCATE_MORE |
| | if (!fresh) { |
| | int m0 = _cuCameraData.GetReservedWidth(); |
| | m = std::max(m, m0); |
| | int n0 = _cuPointData.GetReservedWidth(); |
| | n = std::max(n, n0); |
| | int k0 = _cuMeasurements.GetReservedWidth(); |
| | k = std::max(k, k0); |
| | } |
| | #endif |
| |
|
| | int p = 8 * m + 4 * n, q = _num_imgpt_q; |
| | size_t szn, total = GetCudaMemoryCap(); |
| | size_t sz0 = 800 * 600 * 2 * 4 * sizeof(float); |
| | size_t szq = q > 0 ? (sizeof(float) * (q + m) * 4) : 0; |
| | size_t sz = sizeof(float) * (258 + 9 * n + 33 * m + 7 * k) + sz0; |
| |
|
| | |
| | sz += p * 6 * sizeof(float); |
| | sz += ((__use_radial_distortion ? 64 : 56) * m + 12 * n) * sizeof(float); |
| | sz += (2 * (k + q) * sizeof(float)); |
| | if (sz > total) return false; |
| |
|
| | |
| | szn = (__no_jacobian_store ? 0 : (sizeof(float) * 8 * k)); |
| | if (sz + szn > total) |
| | __no_jacobian_store = false; |
| | else |
| | sz += szn; |
| | |
| | szn = ((!__no_jacobian_store && __jc_store_transpose) ? 16 * k * sizeof(float) |
| | : 0); |
| | if (sz + szn > total) |
| | __jc_store_transpose = false; |
| | else |
| | sz += szn; |
| | |
| | szn = ((!__no_jacobian_store && __jc_store_original) ? 16 * k * sizeof(float) |
| | : 0); |
| | if (sz + szn > total) |
| | __jc_store_original = false; |
| | else |
| | sz += szn; |
| | |
| | szn = ((!__no_jacobian_store && __jc_store_transpose && !__jc_store_original) |
| | ? k * sizeof(int) |
| | : 0); |
| | if (sz + szn > total) { |
| | __jc_store_transpose = false; |
| | sz -= (16 * k * sizeof(float)); |
| | } else |
| | sz += szn; |
| |
|
| | return sz <= total; |
| | } |
| |
|
| | void SparseBundleCU::ReserveStorage(size_t ncam, size_t npt, size_t nproj) { |
| | if (ncam <= 1 || npt <= 1 || nproj <= 1) { |
| | ReleaseAllocatedData(); |
| | |
| | __jc_store_transpose = true; |
| | __jc_store_original = true; |
| | __no_jacobian_store = false; |
| | } else { |
| | const int* camidx = _camera_idx; |
| | const int* ptidx = _point_idx; |
| | int ncam_ = _num_camera; |
| | int npt_ = _num_point; |
| | int nproj_ = _num_imgpt; |
| |
|
| | #ifdef PBA_CUDA_ALLOCATE_MORE |
| | size_t ncam_reserved = _cuCameraData.GetReservedWidth(); |
| | size_t npt_reserved = _cuPointData.GetReservedWidth(); |
| | size_t nproj_reserved = _cuMeasurements.GetReservedWidth(); |
| | ncam = std::max(ncam, ncam_reserved); |
| | npt = std::max(npt, npt_reserved); |
| | nproj = std::max(nproj, nproj_reserved); |
| | #endif |
| |
|
| | _camera_idx = NULL; |
| | _point_idx = NULL; |
| | _num_camera = (int)ncam; |
| | _num_point = (int)npt; |
| | _num_imgpt = (int)nproj; |
| |
|
| | if (__verbose_level) |
| | std::cout << "Reserving storage for ncam = " << ncam << "; npt = " << npt |
| | << "; nproj = " << nproj << '\n'; |
| | InitializeBundleGPU(); |
| |
|
| | _num_camera = ncam_; |
| | _num_point = npt_; |
| | _num_imgpt = nproj_; |
| | _camera_idx = camidx; |
| | _point_idx = ptidx; |
| | } |
| | } |
| |
|
| | static size_t upgrade_dimension(size_t sz) { |
| | size_t x = 1; |
| | while (x < sz) x <<= 1; |
| | return x; |
| | } |
| |
|
| | void SparseBundleCU::ReserveStorageAuto() { |
| | if (_cuCameraData.data() == NULL || _cuPointData.data() == NULL || |
| | _cuMeasurements.data() == NULL) |
| | return; |
| | ReserveStorage(upgrade_dimension(_num_camera), upgrade_dimension(_num_point), |
| | upgrade_dimension(_num_imgpt)); |
| | } |
| |
|
| | #define REPORT_ALLOCATION(NAME) \ |
| | if (__verbose_allocation && NAME.GetDataSize() > 1024) \ |
| | std::cout << (NAME.GetDataSize() > 1024 * 1024 \ |
| | ? NAME.GetDataSize() / 1024 / 1024 \ |
| | : NAME.GetDataSize() / 1024) \ |
| | << (NAME.GetDataSize() > 1024 * 1024 ? "MB" : "KB") \ |
| | << "\t allocated for " #NAME "\n"; |
| |
|
| | #define ASSERT_ALLOCATION(NAME) \ |
| | if (!success) { \ |
| | std::cerr << "WARNING: failed to allocate " \ |
| | << (__verbose_allocation ? #NAME "; size = " : "") \ |
| | << (total_sz / 1024 / 1024) << "MB + " \ |
| | << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \ |
| | return false; \ |
| | } else { \ |
| | total_sz += NAME.GetDataSize(); \ |
| | REPORT_ALLOCATION(NAME); \ |
| | } |
| |
|
| | #define CHECK_ALLOCATION(NAME) \ |
| | if (NAME.GetDataSize() == 0 && NAME.GetRequiredSize() > 0) { \ |
| | ClearPreviousError(); \ |
| | std::cerr << "WARNING: unable to allocate " #NAME ": " \ |
| | << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \ |
| | } else { \ |
| | total_sz += NAME.GetDataSize(); \ |
| | REPORT_ALLOCATION(NAME); \ |
| | } |
| |
|
| | #define ALLOCATE_REQUIRED_DATA(NAME, num, channels) \ |
| | { \ |
| | success &= NAME.InitTexture(num, 1, channels); \ |
| | ASSERT_ALLOCATION(NAME); \ |
| | } |
| |
|
| | #define ALLOCATE_OPTIONAL_DATA(NAME, num, channels, option) \ |
| | if (option) { \ |
| | option = NAME.InitTexture(num, 1, channels); \ |
| | CHECK_ALLOCATION(NAME); \ |
| | } else { \ |
| | NAME.InitTexture(0, 0, 0); \ |
| | } |
| |
|
| | bool SparseBundleCU::TransferDataToGPU() { |
| | |
| | bool success = true; |
| | size_t total_sz = 0; |
| |
|
| | |
| | vector<int> qmap, qlist; |
| | vector<float> qmapw, qlistw; |
| | ProcessIndexCameraQ(qmap, qlist); |
| |
|
| | |
| | ALLOCATE_REQUIRED_DATA(_cuBufferData, 256, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuPointData, _num_point, 4); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraData, _num_camera, 16); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraDataEX, _num_camera, 16); |
| |
|
| | |
| | ALLOCATE_REQUIRED_DATA(_cuCameraMeasurementMap, _num_camera + 1, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraMeasurementList, _num_imgpt, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuPointMeasurementMap, _num_point + 1, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuProjectionMap, _num_imgpt, 2); |
| | ALLOCATE_REQUIRED_DATA(_cuImageProj, _num_imgpt + _num_imgpt_q, 2); |
| | ALLOCATE_REQUIRED_DATA(_cuPointDataEX, _num_point, 4); |
| | ALLOCATE_REQUIRED_DATA(_cuMeasurements, _num_imgpt, 2); |
| |
|
| | |
| | ALLOCATE_REQUIRED_DATA(_cuCameraQMap, _num_imgpt_q, 2); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraQMapW, _num_imgpt_q, 2); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraQList, (_num_imgpt_q > 0 ? _num_camera : 0), |
| | 2); |
| | ALLOCATE_REQUIRED_DATA(_cuCameraQListW, (_num_imgpt_q > 0 ? _num_camera : 0), |
| | 2); |
| |
|
| | if (__no_jacobian_store) { |
| | _cuJacobianCamera.ReleaseData(); |
| | _cuJacobianCameraT.ReleaseData(); |
| | _cuJacobianPoint.ReleaseData(); |
| | _cuCameraMeasurementListT.ReleaseData(); |
| | } else { |
| | ALLOCATE_REQUIRED_DATA(_cuJacobianPoint, _num_imgpt * 2, 4); |
| | ALLOCATE_OPTIONAL_DATA(_cuJacobianCameraT, _num_imgpt * 2, 8, |
| | __jc_store_transpose); |
| | ALLOCATE_OPTIONAL_DATA(_cuJacobianCamera, _num_imgpt * 2, 8, |
| | __jc_store_original); |
| |
|
| | if ((!__jc_store_original || __profile_pba) && __jc_store_transpose) { |
| | ALLOCATE_OPTIONAL_DATA(_cuCameraMeasurementListT, _num_imgpt, 1, |
| | __jc_store_transpose); |
| | if (!__jc_store_transpose) _cuJacobianCameraT.ReleaseData(); |
| | } else { |
| | _cuCameraMeasurementListT.ReleaseData(); |
| | } |
| | } |
| |
|
| | |
| | if (_camera_idx && _point_idx) { |
| | |
| | BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); |
| | |
| | vector<int> cpi(_num_camera + 1), cpidx(_num_imgpt); |
| | vector<int> cpnum(_num_camera, 0); |
| | cpi[0] = 0; |
| | for (int i = 0; i < _num_imgpt; ++i) cpnum[_camera_idx[i]]++; |
| | for (int i = 1; i <= _num_camera; ++i) cpi[i] = cpi[i - 1] + cpnum[i - 1]; |
| | vector<int> cptidx = cpi; |
| | for (int i = 0; i < _num_imgpt; ++i) cpidx[cptidx[_camera_idx[i]]++] = i; |
| | if (_num_imgpt_q > 0) ProcessWeightCameraQ(cpnum, qmap, qmapw, qlistw); |
| | BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); |
| |
|
| | |
| | BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); |
| | _cuMeasurements.CopyFromHost(_imgpt_datax.size() > 0 ? &_imgpt_datax[0] |
| | : _imgpt_data); |
| | _cuCameraData.CopyFromHost(_camera_data); |
| | _cuPointData.CopyFromHost(_point_data); |
| | _cuCameraMeasurementMap.CopyFromHost(&cpi[0]); |
| | _cuCameraMeasurementList.CopyFromHost(&cpidx[0]); |
| | if (_cuCameraMeasurementListT.IsValid()) { |
| | vector<int> ridx(_num_imgpt); |
| | for (int i = 0; i < _num_imgpt; ++i) ridx[cpidx[i]] = i; |
| | _cuCameraMeasurementListT.CopyFromHost(&ridx[0]); |
| | } |
| | if (_num_imgpt_q > 0) { |
| | _cuCameraQMap.CopyFromHost(&qmap[0]); |
| | _cuCameraQMapW.CopyFromHost(&qmapw[0]); |
| | _cuCameraQList.CopyFromHost(&qlist[0]); |
| | _cuCameraQListW.CopyFromHost(&qlistw[0]); |
| | } |
| | BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); |
| |
|
| | |
| | |
| | BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); |
| | vector<int> ppi(_num_point + 1); |
| | for (int i = 0, last_point = -1; i < _num_imgpt; ++i) { |
| | int pt = _point_idx[i]; |
| | while (last_point < pt) ppi[++last_point] = i; |
| | } |
| | ppi[_num_point] = _num_imgpt; |
| |
|
| | |
| | vector<int> projection_map(_num_imgpt * 2); |
| | for (int i = 0; i < _num_imgpt; ++i) { |
| | int* imp = &projection_map[i * 2]; |
| | imp[0] = _camera_idx[i] * 2; |
| | imp[1] = _point_idx[i]; |
| | } |
| | BundleTimerSwap(TIMER_PREPROCESSING, TIMER_GPU_ALLOCATION); |
| |
|
| | |
| | BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); |
| | _cuPointMeasurementMap.CopyFromHost(&ppi[0]); |
| | _cuProjectionMap.CopyFromHost(&projection_map[0]); |
| | BundleTimerSwap(TIMER_GPU_UPLOAD, TIMER_GPU_ALLOCATION); |
| | } |
| |
|
| | __memory_usage = total_sz; |
| | if (__verbose_level > 1) |
| | std::cout << "Memory for Motion/Structure/Jacobian:\t" |
| | << (total_sz / 1024 / 1024) << "MB\n"; |
| | return success; |
| | } |
| |
|
| | bool SparseBundleCU::ProcessIndexCameraQ(vector<int>& qmap, |
| | vector<int>& qlist) { |
| | |
| | qmap.resize(0); |
| | qlist.resize(0); |
| | _num_imgpt_q = 0; |
| |
|
| | |
| | if (_camera_idx == NULL) return true; |
| | if (_point_idx == NULL) return true; |
| | if (_focal_mask == NULL) return true; |
| | if (_num_camera == 0) return true; |
| | if (_weight_q <= 0) return true; |
| |
|
| | |
| |
|
| | int error = 0; |
| | vector<int> temp(_num_camera * 2, -1); |
| |
|
| | for (int i = 0; i < _num_camera; ++i) { |
| | int iq = _focal_mask[i]; |
| | if (iq > i) { |
| | error = 1; |
| | break; |
| | } |
| | if (iq < 0) continue; |
| | if (iq == i) continue; |
| | int ip = temp[2 * iq]; |
| | |
| | |
| | |
| | |
| | |
| | |
| | if (_focal_mask[iq] != iq) { |
| | error = 1; |
| | break; |
| | } else if (ip == -1) { |
| | temp[2 * iq] = i; |
| | temp[2 * iq + 1] = i; |
| | temp[2 * i] = iq; |
| | temp[2 * i + 1] = iq; |
| | } else { |
| | |
| | temp[2 * i] = ip; |
| | temp[2 * i + 1] = iq; |
| | temp[2 * ip + 1] = i; |
| | temp[2 * iq] = i; |
| | } |
| | } |
| |
|
| | if (error) { |
| | std::cout << "Error: incorrect constraints\n"; |
| | _focal_mask = NULL; |
| | return false; |
| | } |
| |
|
| | qlist.resize(_num_camera * 2, -1); |
| | for (int i = 0; i < _num_camera; ++i) { |
| | int inext = temp[2 * i + 1]; |
| | if (inext == -1) continue; |
| | qlist[2 * i] = _num_imgpt + _num_imgpt_q; |
| | qlist[2 * inext + 1] = _num_imgpt + _num_imgpt_q; |
| | qmap.push_back(i); |
| | qmap.push_back(inext); |
| | _num_imgpt_q++; |
| | } |
| | return true; |
| | } |
| |
|
| | void SparseBundleCU::ProcessWeightCameraQ(vector<int>& cpnum, vector<int>& qmap, |
| | vector<float>& qmapw, |
| | vector<float>& qlistw) { |
| | |
| | vector<float> qpnum(_num_camera, 0), qcnum(_num_camera, 0); |
| | vector<float> fs(_num_camera, 0), rs(_num_camera, 0); |
| |
|
| | for (int i = 0; i < _num_camera; ++i) { |
| | int qi = _focal_mask[i]; |
| | if (qi == -1) continue; |
| | |
| | |
| | fs[qi] += _camera_data[i].f; |
| | rs[qi] += _camera_data[i].radial; |
| | qpnum[qi] += cpnum[i]; |
| | qcnum[qi] += 1.0f; |
| | } |
| |
|
| | |
| | for (int i = 0; i < _num_camera; ++i) { |
| | int qi = _focal_mask[i]; |
| | if (qi == -1) continue; |
| | |
| | |
| | _camera_data[i].f = fs[qi] / qcnum[qi]; |
| | _camera_data[i].radial = rs[qi] / qcnum[qi]; |
| | } |
| |
|
| | qmapw.resize(_num_imgpt_q * 2, 0); |
| | qlistw.resize(_num_camera * 2, 0); |
| | for (int i = 0; i < _num_imgpt_q; ++i) { |
| | int cidx = qmap[i * 2], qi = _focal_mask[cidx]; |
| | float wi = sqrt(qpnum[qi] / qcnum[qi]) * _weight_q; |
| | float wr = (__use_radial_distortion ? wi * _camera_data[qi].f : 0.0); |
| | qmapw[i * 2] = wi; |
| | qmapw[i * 2 + 1] = wr; |
| | qlistw[cidx * 2] = wi; |
| | qlistw[cidx * 2 + 1] = wr; |
| | } |
| | } |
| |
|
| | void SparseBundleCU::ReleaseAllocatedData() { |
| | _cuCameraData.ReleaseData(); |
| | _cuCameraDataEX.ReleaseData(); |
| | _cuPointData.ReleaseData(); |
| | _cuPointDataEX.ReleaseData(); |
| | _cuMeasurements.ReleaseData(); |
| | _cuImageProj.ReleaseData(); |
| | _cuJacobianCamera.ReleaseData(); |
| | _cuJacobianPoint.ReleaseData(); |
| | _cuJacobianCameraT.ReleaseData(); |
| | _cuProjectionMap.ReleaseData(); |
| | _cuPointMeasurementMap.ReleaseData(); |
| | _cuCameraMeasurementMap.ReleaseData(); |
| | _cuCameraMeasurementList.ReleaseData(); |
| | _cuCameraMeasurementListT.ReleaseData(); |
| | _cuBufferData.ReleaseData(); |
| | _cuBlockPC.ReleaseData(); |
| | _cuVectorJtE.ReleaseData(); |
| | _cuVectorJJ.ReleaseData(); |
| | _cuVectorJX.ReleaseData(); |
| | _cuVectorXK.ReleaseData(); |
| | _cuVectorPK.ReleaseData(); |
| | _cuVectorZK.ReleaseData(); |
| | _cuVectorRK.ReleaseData(); |
| | _cuVectorSJ.ReleaseData(); |
| | _cuCameraQList.ReleaseData(); |
| | _cuCameraQMap.ReleaseData(); |
| | _cuCameraQMapW.ReleaseData(); |
| | _cuCameraQListW.ReleaseData(); |
| | ProgramCU::ResetCurrentDevice(); |
| | } |
| |
|
| | void SparseBundleCU::NormalizeDataF() { |
| | int incompatible_radial_distortion = 0; |
| | if (__focal_normalize) { |
| | if (__focal_scaling == 1.0f) { |
| | |
| | |
| | vector<float> focals(_num_camera); |
| | for (int i = 0; i < _num_camera; ++i) focals[i] = _camera_data[i].f; |
| | std::nth_element(focals.begin(), focals.begin() + _num_camera / 2, |
| | focals.end()); |
| | float median_focal_length = focals[_num_camera / 2]; |
| | __focal_scaling = __data_normalize_median / median_focal_length; |
| | float radial_factor = median_focal_length * median_focal_length * 4.0f; |
| |
|
| | |
| | _imgpt_datax.resize(_num_imgpt * 2); |
| | for (int i = 0; i < _num_imgpt * 2; ++i) |
| | _imgpt_datax[i] = _imgpt_data[i] * __focal_scaling; |
| | for (int i = 0; i < _num_camera; ++i) { |
| | _camera_data[i].f *= __focal_scaling; |
| | if (!__use_radial_distortion) { |
| | } else if (__reset_initial_distortion) { |
| | _camera_data[i].radial = 0; |
| | } else if (_camera_data[i].distortion_type != __use_radial_distortion) { |
| | incompatible_radial_distortion++; |
| | _camera_data[i].radial = 0; |
| | } else if (__use_radial_distortion == -1) { |
| | _camera_data[i].radial *= radial_factor; |
| | } |
| | } |
| | if (__verbose_level > 2) |
| | std::cout << "Focal length normalized by " << __focal_scaling << '\n'; |
| | __reset_initial_distortion = false; |
| | } |
| | } else { |
| | if (__use_radial_distortion) { |
| | for (int i = 0; i < _num_camera; ++i) { |
| | if (__reset_initial_distortion) { |
| | _camera_data[i].radial = 0; |
| | } else if (_camera_data[i].distortion_type != __use_radial_distortion) { |
| | _camera_data[i].radial = 0; |
| | incompatible_radial_distortion++; |
| | } |
| | } |
| | __reset_initial_distortion = false; |
| | } |
| | _imgpt_datax.resize(0); |
| | } |
| |
|
| | if (incompatible_radial_distortion) { |
| | std::cout << "ERROR: incompatible radial distortion input; reset to 0;\n"; |
| | } |
| | } |
| |
|
| | void SparseBundleCU::NormalizeDataD() { |
| | if (__depth_scaling == 1.0f) { |
| | const float dist_bound = 1.0f; |
| | vector<float> oz(_num_imgpt); |
| | vector<float> cpdist1(_num_camera, dist_bound); |
| | vector<float> cpdist2(_num_camera, -dist_bound); |
| | vector<int> camnpj(_num_camera, 0), cambpj(_num_camera, 0); |
| | int bad_point_count = 0; |
| | for (int i = 0; i < _num_imgpt; ++i) { |
| | int cmidx = _camera_idx[i]; |
| | CameraT* cam = _camera_data + cmidx; |
| | float* rz = cam->m[2]; |
| | float* x = _point_data + 4 * _point_idx[i]; |
| | oz[i] = (rz[0] * x[0] + rz[1] * x[1] + rz[2] * x[2] + cam->t[2]); |
| |
|
| | |
| | |
| | float ozr = oz[i] / cam->t[2]; |
| | if (fabs(ozr) < __depth_check_epsilon) { |
| | bad_point_count++; |
| | float px = cam->f * (cam->m[0][0] * x[0] + cam->m[0][1] * x[1] + |
| | cam->m[0][2] * x[2] + cam->t[0]); |
| | float py = cam->f * (cam->m[1][0] * x[0] + cam->m[1][1] * x[1] + |
| | cam->m[1][2] * x[2] + cam->t[1]); |
| | float mx = _imgpt_data[i * 2], my = _imgpt_data[2 * i + 1]; |
| | bool checkx = fabs(mx) > fabs(my); |
| | if ((checkx && px * oz[i] * mx < 0 && fabs(mx) > 64) || |
| | (!checkx && py * oz[i] * my < 0 && fabs(my) > 64)) { |
| | if (__verbose_level > 3) |
| | std::cout << "Warning: proj of #" << cmidx |
| | << " on the wrong side, oz = " << oz[i] << " (" |
| | << (px / oz[i]) << ',' << (py / oz[i]) << ") (" << mx |
| | << ',' << my << ")\n"; |
| | |
| | if (oz[i] > 0) |
| | cpdist2[cmidx] = 0; |
| | else |
| | cpdist1[cmidx] = 0; |
| | } |
| | if (oz[i] >= 0) |
| | cpdist1[cmidx] = std::min(cpdist1[cmidx], oz[i]); |
| | else |
| | cpdist2[cmidx] = std::max(cpdist2[cmidx], oz[i]); |
| | } |
| | if (oz[i] < 0) { |
| | __num_point_behind++; |
| | cambpj[cmidx]++; |
| | } |
| | camnpj[cmidx]++; |
| | } |
| | if (bad_point_count > 0 && __depth_degeneracy_fix) { |
| | if (!__focal_normalize || !__depth_normalize) |
| | std::cout << "Enable data normalization on degeneracy\n"; |
| | __focal_normalize = true; |
| | __depth_normalize = true; |
| | } |
| | if (__depth_normalize) { |
| | std::nth_element(oz.begin(), oz.begin() + _num_imgpt / 2, oz.end()); |
| | float oz_median = oz[_num_imgpt / 2]; |
| | float shift_min = std::min(oz_median * 0.001f, 1.0f); |
| | float dist_threshold = shift_min * 0.1f; |
| | __depth_scaling = (1.0f / oz_median) / __data_normalize_median; |
| | if (__verbose_level > 2) |
| | std::cout << "Depth normalized by " << __depth_scaling << " (" |
| | << oz_median << ")\n"; |
| |
|
| | for (int i = 0; i < _num_camera; ++i) { |
| | |
| | if (!__depth_degeneracy_fix) { |
| | } else if ((cpdist1[i] < dist_threshold || |
| | cpdist2[i] > -dist_threshold)) { |
| | float shift = shift_min; |
| | |
| | |
| | |
| | bool boths = |
| | cpdist1[i] < dist_threshold && cpdist2[i] > -dist_threshold; |
| | _camera_data[i].t[2] += shift; |
| | if (__verbose_level > 3) |
| | std::cout << "Adjust C" << std::setw(5) << i << " by " |
| | << std::setw(12) << shift << " [B" << std::setw(2) |
| | << cambpj[i] << "/" << std::setw(5) << camnpj[i] << "] [" |
| | << (boths ? 'X' : ' ') << "][" << cpdist1[i] << ", " |
| | << cpdist2[i] << "]\n"; |
| | __num_camera_modified++; |
| | } |
| | _camera_data[i].t[0] *= __depth_scaling; |
| | _camera_data[i].t[1] *= __depth_scaling; |
| | _camera_data[i].t[2] *= __depth_scaling; |
| | } |
| | for (int i = 0; i < _num_point; ++i) { |
| | |
| | _point_data[4 * i + 0] *= __depth_scaling; |
| | _point_data[4 * i + 1] *= __depth_scaling; |
| | _point_data[4 * i + 2] *= __depth_scaling; |
| | } |
| | } |
| | if (__num_point_behind > 0) |
| | std::cout << "WARNING: " << __num_point_behind |
| | << " points are behind cameras.\n"; |
| | if (__num_camera_modified > 0) |
| | std::cout << "WARNING: " << __num_camera_modified |
| | << " camera moved to avoid degeneracy.\n"; |
| | } |
| | } |
| |
|
| | void SparseBundleCU::NormalizeData() { |
| | TimerBA timer(this, TIMER_PREPROCESSING); |
| | NormalizeDataD(); |
| | NormalizeDataF(); |
| | } |
| |
|
| | void SparseBundleCU::DenormalizeData() { |
| | if (__focal_normalize && __focal_scaling != 1.0f) { |
| | float squared_focal_factor = (__focal_scaling * __focal_scaling); |
| | for (int i = 0; i < _num_camera; ++i) { |
| | _camera_data[i].f /= __focal_scaling; |
| | if (__use_radial_distortion == -1) |
| | _camera_data[i].radial *= squared_focal_factor; |
| | _camera_data[i].distortion_type = __use_radial_distortion; |
| | } |
| | _projection_sse /= squared_focal_factor; |
| | __focal_scaling = 1.0f; |
| | _imgpt_datax.resize(0); |
| | } else if (__use_radial_distortion) { |
| | for (int i = 0; i < _num_camera; ++i) |
| | _camera_data[i].distortion_type = __use_radial_distortion; |
| | } |
| |
|
| | if (__depth_normalize && __depth_scaling != 1.0f) { |
| | for (int i = 0; i < _num_camera; ++i) { |
| | _camera_data[i].t[0] /= __depth_scaling; |
| | _camera_data[i].t[1] /= __depth_scaling; |
| | _camera_data[i].t[2] /= __depth_scaling; |
| | } |
| | for (int i = 0; i < _num_point; ++i) { |
| | _point_data[4 * i + 0] /= __depth_scaling; |
| | _point_data[4 * i + 1] /= __depth_scaling; |
| | _point_data[4 * i + 2] /= __depth_scaling; |
| | } |
| | __depth_scaling = 1.0f; |
| | } |
| | } |
| |
|
| | void SparseBundleCU::TransferDataToHost() { |
| | TimerBA timer(this, TIMER_GPU_DOWNLOAD); |
| | _cuCameraData.CopyToHost(_camera_data); |
| | _cuPointData.CopyToHost(_point_data); |
| | } |
| |
|
| | float SparseBundleCU::EvaluateProjection(CuTexImage& cam, CuTexImage& point, |
| | CuTexImage& proj) { |
| | ++__num_projection_eval; |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_PJ, true); |
| | ComputeProjection(cam, point, _cuMeasurements, _cuProjectionMap, proj, |
| | __use_radial_distortion); |
| | if (_num_imgpt_q > 0) |
| | ComputeProjectionQ(cam, _cuCameraQMap, _cuCameraQMapW, proj, _num_imgpt); |
| | return (float)ComputeVectorNorm(proj, _cuBufferData); |
| | } |
| |
|
| | float SparseBundleCU::EvaluateProjectionX(CuTexImage& cam, CuTexImage& point, |
| | CuTexImage& proj) { |
| | ++__num_projection_eval; |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_PJ, true); |
| | ComputeProjectionX(cam, point, _cuMeasurements, _cuProjectionMap, proj, |
| | __use_radial_distortion); |
| | if (_num_imgpt_q > 0) |
| | ComputeProjectionQ(cam, _cuCameraQMap, _cuCameraQMapW, proj, _num_imgpt); |
| | return (float)ComputeVectorNorm(proj, _cuBufferData); |
| | } |
| |
|
| | void SparseBundleCU::DebugProjections() { |
| | double e1 = 0, e2 = 0; |
| | for (int i = 0; i < _num_imgpt; ++i) { |
| | float* c = (float*)(_camera_data + _camera_idx[i]); |
| | float* p = _point_data + 4 * _point_idx[i]; |
| | const float* m = _imgpt_datax.size() > 0 ? (&_imgpt_datax[i * 2]) |
| | : (_imgpt_data + 2 * i); |
| | float* r = c + 4; |
| | float* t = c + 1; |
| | float dx1, dy1; |
| | |
| | float z = r[6] * p[0] + r[7] * p[1] + r[8] * p[2] + t[2]; |
| | float xx = (r[0] * p[0] + r[1] * p[1] + r[2] * p[2] + t[0]); |
| | float yy = (r[3] * p[0] + r[4] * p[1] + r[5] * p[2] + t[1]); |
| | float x = xx / z; |
| | float y = yy / z; |
| | if (__use_radial_distortion == -1) { |
| | float rn = (m[0] * m[0] + m[1] * m[1]) * c[13] + 1.0f; |
| | dx1 = c[0] * x - m[0] * rn; |
| | dy1 = c[0] * y - m[1] * rn; |
| | e1 += (dx1 * dx1 + dy1 * dy1); |
| | e2 += (dx1 * dx1 + dy1 * dy1) / (rn * rn); |
| | } else if (__use_radial_distortion) { |
| | float rn = (x * x + y * y) * c[13] + 1.0f; |
| | dx1 = c[0] * x * rn - m[0]; |
| | dy1 = c[0] * y * rn - m[1]; |
| | e1 += (dx1 * dx1 + dy1 * dy1) / (rn * rn); |
| | e2 += (dx1 * dx1 + dy1 * dy1); |
| | } else { |
| | dx1 = c[0] * x - m[0]; |
| | dy1 = c[0] * y - m[1]; |
| | e1 += (dx1 * dx1 + dy1 * dy1); |
| | e2 += (dx1 * dx1 + dy1 * dy1); |
| | } |
| | if (!isfinite(dx1) || !isfinite(dy1)) |
| | std::cout << "x = " << xx << " y = " << yy << " z = " << z << '\n' |
| | << "t0 = " << t[0] << " t1 = " << t[1] << " t2 = " << t[2] |
| | << '\n' << "p0 = " << p[0] << " p1 = " << p[1] |
| | << " p2 = " << p[2] << '\n'; |
| | } |
| | e1 = e1 / (__focal_scaling * __focal_scaling) / _num_imgpt; |
| | e2 = e2 / (__focal_scaling * __focal_scaling) / _num_imgpt; |
| | std::cout << "DEBUG: mean squared error = " << e1 |
| | << " in undistorted domain;\n"; |
| | std::cout << "DEBUG: mean squared error = " << e2 |
| | << " in distorted domain.\n"; |
| | } |
| |
|
| | bool SparseBundleCU::InitializeStorageForCG() { |
| | bool success = true; |
| | size_t total_sz = 0; |
| | int plen = GetParameterLength(); |
| |
|
| | |
| | ALLOCATE_REQUIRED_DATA(_cuVectorJtE, plen, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuVectorXK, plen, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuVectorPK, plen, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuVectorRK, plen, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuVectorJJ, plen, 1); |
| | ALLOCATE_REQUIRED_DATA(_cuVectorZK, plen, 1); |
| |
|
| | |
| | unsigned int cblock_len = (__use_radial_distortion ? 64 : 56); |
| | ALLOCATE_REQUIRED_DATA(_cuBlockPC, _num_camera * cblock_len + 12 * _num_point, |
| | 1); |
| | if (__accurate_gain_ratio) { |
| | ALLOCATE_REQUIRED_DATA(_cuVectorJX, _num_imgpt + _num_imgpt_q, 2); |
| | } else { |
| | _cuVectorJX.SetTexture(_cuImageProj.data(), _num_imgpt + _num_imgpt_q, 2); |
| | } |
| | ALLOCATE_OPTIONAL_DATA(_cuVectorSJ, plen, 1, __jacobian_normalize); |
| |
|
| | |
| | __memory_usage += total_sz; |
| | if (__verbose_level > 1) |
| | std::cout << "Memory for Conjugate Gradient Solver:\t" |
| | << (total_sz / 1024 / 1024) << "MB\n"; |
| | return success; |
| | } |
| |
|
| | void SparseBundleCU::PrepareJacobianNormalization() { |
| | if (!_cuVectorSJ.IsValid()) return; |
| |
|
| | if ((__jc_store_transpose || __jc_store_original) && |
| | _cuJacobianPoint.IsValid() && !__bundle_current_mode) { |
| | CuTexImage null; |
| | null.SwapData(_cuVectorSJ); |
| | EvaluateJacobians(); |
| | null.SwapData(_cuVectorSJ); |
| | ComputeDiagonal(_cuVectorJJ, _cuVectorSJ); |
| | ComputeSQRT(_cuVectorSJ); |
| | } else { |
| | CuTexImage null; |
| | null.SwapData(_cuVectorSJ); |
| | EvaluateJacobians(); |
| | ComputeBlockPC(0, true); |
| | null.SwapData(_cuVectorSJ); |
| | _cuVectorJJ.SwapData(_cuVectorSJ); |
| | ProgramCU::ComputeRSQRT(_cuVectorSJ); |
| | } |
| | } |
| |
|
| | void SparseBundleCU::EvaluateJacobians(bool shuffle) { |
| | if (__no_jacobian_store) return; |
| | if (__bundle_current_mode == BUNDLE_ONLY_MOTION && !__jc_store_original && |
| | !__jc_store_transpose) |
| | return; |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JJ, true); |
| |
|
| | if (__jc_store_original || !__jc_store_transpose) { |
| | ComputeJacobian(_cuCameraData, _cuPointData, _cuJacobianCamera, |
| | _cuJacobianPoint, _cuProjectionMap, _cuVectorSJ, |
| | _cuMeasurements, _cuCameraMeasurementList, |
| | __fixed_intrinsics, __use_radial_distortion, false); |
| | if (shuffle && __jc_store_transpose && _cuJacobianCameraT.IsValid()) |
| | ShuffleCameraJacobian(_cuJacobianCamera, _cuCameraMeasurementList, |
| | _cuJacobianCameraT); |
| | } else { |
| | ComputeJacobian(_cuCameraData, _cuPointData, _cuJacobianCameraT, |
| | _cuJacobianPoint, _cuProjectionMap, _cuVectorSJ, |
| | _cuMeasurements, _cuCameraMeasurementListT, |
| | __fixed_intrinsics, __use_radial_distortion, true); |
| | } |
| | ++__num_jacobian_eval; |
| | } |
| |
|
| | void SparseBundleCU::ComputeJtE(CuTexImage& E, CuTexImage& JtE, int mode) { |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JTE, true); |
| | if (mode == 0) mode = __bundle_current_mode; |
| | if (__no_jacobian_store || (!__jc_store_original && !__jc_store_transpose)) { |
| | ProgramCU::ComputeJtE_(E, JtE, _cuCameraData, _cuPointData, _cuMeasurements, |
| | _cuCameraMeasurementMap, _cuCameraMeasurementList, |
| | _cuPointMeasurementMap, _cuProjectionMap, |
| | _cuJacobianPoint, __fixed_intrinsics, |
| | __use_radial_distortion, mode); |
| |
|
| | |
| | if (!_cuVectorSJ.IsValid()) { |
| | } else if (mode == 2) { |
| | if (!_cuJacobianPoint.IsValid()) |
| | ComputeVXY(JtE, _cuVectorSJ, JtE, _num_point * 4, _num_camera * 8); |
| | } else if (mode == 1) |
| | ComputeVXY(JtE, _cuVectorSJ, JtE, _num_camera * 8); |
| | else |
| | ComputeVXY(JtE, _cuVectorSJ, JtE, |
| | _cuJacobianPoint.IsValid() ? _num_camera * 8 : 0); |
| |
|
| | } else if (__jc_store_transpose) { |
| | ProgramCU::ComputeJtE(E, _cuJacobianCameraT, _cuCameraMeasurementMap, |
| | _cuCameraMeasurementList, _cuJacobianPoint, |
| | _cuPointMeasurementMap, JtE, true, mode); |
| | } else { |
| | ProgramCU::ComputeJtE(E, _cuJacobianCamera, _cuCameraMeasurementMap, |
| | _cuCameraMeasurementList, _cuJacobianPoint, |
| | _cuPointMeasurementMap, JtE, false, mode); |
| | } |
| |
|
| | if (mode != 2 && _num_imgpt_q > 0) |
| | ProgramCU::ComputeJQtEC(E, _cuCameraQList, _cuCameraQListW, _cuVectorSJ, |
| | JtE); |
| | } |
| |
|
| | void SparseBundleCU::SaveBundleRecord(int iter, float res, float damping, |
| | float& g_norm, float& g_inf) { |
| | |
| | |
| | g_inf = |
| | __lm_check_gradient ? ComputeVectorMax(_cuVectorJtE, _cuBufferData) : 0; |
| | g_norm = __save_gradient_norm |
| | ? float(ComputeVectorNorm(_cuVectorJtE, _cuBufferData)) |
| | : g_inf; |
| | ConfigBA::SaveBundleRecord(iter, res, damping, g_norm, g_inf); |
| | } |
| |
|
| | void SparseBundleCU::ComputeJX(CuTexImage& X, CuTexImage& JX, int mode) { |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_JX, true); |
| | if (__no_jacobian_store || (__multiply_jx_usenoj && mode != 2) || |
| | !__jc_store_original) { |
| | if (_cuVectorSJ.IsValid()) { |
| | if (mode == 0) |
| | ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK); |
| | else if (mode == 1) |
| | ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK, _num_camera * 8); |
| | else if (mode == 2) |
| | ProgramCU::ComputeVXY(X, _cuVectorSJ, _cuVectorZK, _num_point * 4, |
| | _num_camera * 8); |
| | ProgramCU::ComputeJX_(_cuVectorZK, JX, _cuCameraData, _cuPointData, |
| | _cuMeasurements, _cuProjectionMap, |
| | __fixed_intrinsics, __use_radial_distortion, mode); |
| | } else { |
| | ProgramCU::ComputeJX_(X, JX, _cuCameraData, _cuPointData, _cuMeasurements, |
| | _cuProjectionMap, __fixed_intrinsics, |
| | __use_radial_distortion, mode); |
| | } |
| | } else { |
| | ProgramCU::ComputeJX(_num_camera * 2, X, _cuJacobianCamera, |
| | _cuJacobianPoint, _cuProjectionMap, JX, mode); |
| | } |
| |
|
| | if (_num_imgpt_q > 0 && mode != 2) { |
| | ProgramCU::ComputeJQX(X, _cuCameraQMap, _cuCameraQMapW, _cuVectorSJ, JX, |
| | _num_imgpt); |
| | } |
| | } |
| |
|
| | void SparseBundleCU::ComputeBlockPC(float lambda, bool dampd) { |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_BC, true); |
| |
|
| | bool use_diagonal_q = _cuCameraQListW.IsValid() && __bundle_current_mode != 2; |
| | if (use_diagonal_q) |
| | ComputeDiagonalQ(_cuCameraQListW, _cuVectorSJ, _cuVectorJJ); |
| |
|
| | if (__no_jacobian_store || (!__jc_store_original && !__jc_store_transpose)) { |
| | ComputeDiagonalBlock_( |
| | lambda, dampd, _cuCameraData, _cuPointData, _cuMeasurements, |
| | _cuCameraMeasurementMap, _cuCameraMeasurementList, |
| | _cuPointMeasurementMap, _cuProjectionMap, _cuJacobianPoint, _cuVectorSJ, |
| | _cuVectorJJ, _cuBlockPC, __fixed_intrinsics, __use_radial_distortion, |
| | use_diagonal_q, __bundle_current_mode); |
| | } else if (__jc_store_transpose) { |
| | ComputeDiagonalBlock(lambda, dampd, _cuJacobianCameraT, |
| | _cuCameraMeasurementMap, _cuJacobianPoint, |
| | _cuPointMeasurementMap, _cuCameraMeasurementList, |
| | _cuVectorJJ, _cuBlockPC, __use_radial_distortion, true, |
| | use_diagonal_q, __bundle_current_mode); |
| | } else { |
| | ComputeDiagonalBlock(lambda, dampd, _cuJacobianCamera, |
| | _cuCameraMeasurementMap, _cuJacobianPoint, |
| | _cuPointMeasurementMap, _cuCameraMeasurementList, |
| | _cuVectorJJ, _cuBlockPC, __use_radial_distortion, |
| | false, use_diagonal_q, __bundle_current_mode); |
| | } |
| | } |
| |
|
| | void SparseBundleCU::ApplyBlockPC(CuTexImage& v, CuTexImage& pv, int mode) { |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_MP, true); |
| | MultiplyBlockConditioner(_num_camera, _num_point, _cuBlockPC, v, pv, |
| | __use_radial_distortion, mode); |
| | } |
| |
|
| | void SparseBundleCU::ComputeDiagonal(CuTexImage& JJ, CuTexImage& JJI) { |
| | |
| | if (__no_jacobian_store) return; |
| | if (!__jc_store_transpose && !__jc_store_original) return; |
| |
|
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_DD, true); |
| | bool use_diagonal_q = _cuCameraQListW.IsValid(); |
| | if (use_diagonal_q) { |
| | CuTexImage null; |
| | ComputeDiagonalQ(_cuCameraQListW, null, JJ); |
| | } |
| | if (__jc_store_transpose) { |
| | ProgramCU::ComputeDiagonal(_cuJacobianCameraT, _cuCameraMeasurementMap, |
| | _cuJacobianPoint, _cuPointMeasurementMap, |
| | _cuCameraMeasurementList, JJ, JJI, true, |
| | __use_radial_distortion, use_diagonal_q); |
| | } else { |
| | ProgramCU::ComputeDiagonal(_cuJacobianCamera, _cuCameraMeasurementMap, |
| | _cuJacobianPoint, _cuPointMeasurementMap, |
| | _cuCameraMeasurementList, JJ, JJI, false, |
| | __use_radial_distortion, use_diagonal_q); |
| | } |
| | } |
| |
|
| | int SparseBundleCU::SolveNormalEquationPCGX(float lambda) { |
| | |
| | |
| | |
| | TimerBA timer(this, TIMER_CG_ITERATION); |
| | __recent_cg_status = ' '; |
| |
|
| | |
| | int plen = GetParameterLength(); |
| | CuTexImage null; |
| | CuTexImage& VectorDP = |
| | __lm_use_diagonal_damp ? _cuVectorJJ : null; |
| | ComputeBlockPC(lambda, __lm_use_diagonal_damp); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | CuTexImage r; |
| | r.SetTexture(_cuVectorRK.data(), 8 * _num_camera); |
| | CuTexImage p; |
| | p.SetTexture(_cuVectorPK.data(), 8 * _num_camera); |
| | CuTexImage z; |
| | z.SetTexture(_cuVectorZK.data(), 8 * _num_camera); |
| | CuTexImage x; |
| | x.SetTexture(_cuVectorXK.data(), 8 * _num_camera); |
| | CuTexImage d; |
| | d.SetTexture(VectorDP.data(), 8 * _num_camera); |
| |
|
| | CuTexImage& u = _cuVectorRK; |
| | CuTexImage& v = _cuVectorPK; |
| | CuTexImage up; |
| | up.SetTexture(u.data() + 8 * _num_camera, 4 * _num_point); |
| | CuTexImage vp; |
| | vp.SetTexture(v.data() + 8 * _num_camera, 4 * _num_point); |
| | CuTexImage uc; |
| | uc.SetTexture(z.data(), 8 * _num_camera); |
| |
|
| | CuTexImage& e = _cuVectorJX; |
| | CuTexImage& e2 = _cuImageProj; |
| |
|
| | ApplyBlockPC(_cuVectorJtE, u, 2); |
| | ComputeJX(u, e, 2); |
| | ComputeJtE(e, uc, 1); |
| | ComputeSAXPY(-1.0f, uc, _cuVectorJtE, r); |
| | ApplyBlockPC(r, p, 1); |
| |
|
| | float_t rtz0 = (float_t)ComputeVectorDot(r, p, _cuBufferData); |
| | |
| | ComputeJX(p, e, 1); |
| | ComputeJtE(e, u, 2); |
| | ApplyBlockPC(u, v, 2); |
| | float_t qtq0 = (float_t)ComputeVectorNorm(e, _cuBufferData); |
| | float_t pdp0 = |
| | (float_t)ComputeVectorNormW(p, d, _cuBufferData); |
| | float_t uv0 = (float_t)ComputeVectorDot(up, vp, _cuBufferData); |
| | float_t alpha0 = rtz0 / (qtq0 + lambda * pdp0 - uv0); |
| |
|
| | if (__verbose_cg_iteration) |
| | std::cout << " --0,\t alpha = " << alpha0 |
| | << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; |
| | if (!isfinite(alpha0)) { |
| | return 0; |
| | } |
| | if (alpha0 == 0) { |
| | __recent_cg_status = 'I'; |
| | return 1; |
| | } |
| |
|
| | |
| | ComputeSAX((float)alpha0, p, x); |
| | ComputeJX(v, e2, 2); |
| | ComputeSAXPY(-1.0f, e2, e, e); |
| | ComputeJtE(e, uc, 1); |
| | ComputeSXYPZ(lambda, d, p, uc, uc); |
| | ComputeSAXPY((float)-alpha0, uc, r, r); |
| |
|
| | |
| | float_t rtzk = rtz0, rtz_min = rtz0, betak; |
| | int iteration = 1; |
| | ++__num_cg_iteration; |
| |
|
| | while (true) { |
| | ApplyBlockPC(r, z, 1); |
| |
|
| | |
| | float_t rtzp = rtzk; |
| | rtzk = (float_t)ComputeVectorDot( |
| | r, z, _cuBufferData); |
| | float_t rtz_ratio = sqrt(fabs(rtzk / rtz0)); |
| |
|
| | if (rtz_ratio < __cg_norm_threshold) { |
| | if (__recent_cg_status == ' ') |
| | __recent_cg_status = iteration < std::min(10, __cg_min_iteration) |
| | ? '0' + iteration |
| | : 'N'; |
| | if (iteration >= __cg_min_iteration) break; |
| | } |
| | |
| | betak = rtzk / rtzp; |
| | rtz_min = std::min(rtz_min, rtzk); |
| |
|
| | ComputeSAXPY((float)betak, p, z, p); |
| | ComputeJX(p, e, 1); |
| | ComputeJtE(e, u, 2); |
| | ApplyBlockPC(u, v, 2); |
| | |
| |
|
| | float_t qtqk = (float_t)ComputeVectorNorm(e, _cuBufferData); |
| | float_t pdpk = |
| | (float_t)ComputeVectorNormW(p, d, _cuBufferData); |
| | float_t uvk = (float_t)ComputeVectorDot(up, vp, _cuBufferData); |
| | float_t alphak = rtzk / (qtqk + lambda * pdpk - uvk); |
| |
|
| | |
| | if (__verbose_cg_iteration) |
| | std::cout << " --" << iteration << ",\t alpha= " << alphak |
| | << ", rtzk/rtz0 = " << rtz_ratio |
| | << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; |
| |
|
| | |
| | if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) { |
| | __recent_cg_status = 'X'; |
| | break; |
| | } |
| |
|
| | |
| | ComputeSAXPY((float)alphak, p, x, x); |
| |
|
| | |
| | ++iteration; |
| | ++__num_cg_iteration; |
| | if (iteration >= std::min(__cg_max_iteration, plen)) break; |
| |
|
| | ComputeJX(v, e2, 2); |
| | ComputeSAXPY(-1.0f, e2, e, e); |
| | ComputeJtE(e, uc, 1); |
| | ComputeSXYPZ(lambda, d, p, uc, uc); |
| | ComputeSAXPY((float)-alphak, uc, r, r); |
| | } |
| |
|
| | |
| |
|
| | ComputeJX(x, e, 1); |
| | ComputeJtE(e, u, 2); |
| | CuTexImage jte_p; |
| | jte_p.SetTexture(_cuVectorJtE.data() + 8 * _num_camera, _num_point * 4); |
| | ComputeSAXPY(-1.0f, up, jte_p, vp); |
| | ApplyBlockPC(v, _cuVectorXK, 2); |
| | return iteration; |
| | } |
| | int SparseBundleCU::SolveNormalEquationPCGB(float lambda) { |
| | |
| | |
| | |
| | TimerBA timer(this, TIMER_CG_ITERATION); |
| | __recent_cg_status = ' '; |
| |
|
| | |
| | int plen = GetParameterLength(); |
| | CuTexImage null; |
| | CuTexImage& VectorDP = |
| | __lm_use_diagonal_damp ? _cuVectorJJ : null; |
| | CuTexImage& VectorQK = _cuVectorZK; |
| | ComputeBlockPC(lambda, __lm_use_diagonal_damp); |
| |
|
| | |
| | ApplyBlockPC(_cuVectorJtE, |
| | _cuVectorPK); |
| | ComputeJX(_cuVectorPK, _cuVectorJX); |
| |
|
| | |
| | float_t rtz0 = (float_t)ComputeVectorDot(_cuVectorJtE, _cuVectorPK, |
| | _cuBufferData); |
| | float_t qtq0 = |
| | (float_t)ComputeVectorNorm(_cuVectorJX, _cuBufferData); |
| | float_t ptdp0 = (float_t)ComputeVectorNormW( |
| | _cuVectorPK, VectorDP, _cuBufferData); |
| | float_t alpha0 = rtz0 / (qtq0 + lambda * ptdp0); |
| |
|
| | if (__verbose_cg_iteration) |
| | std::cout << " --0,\t alpha = " << alpha0 |
| | << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; |
| | if (!isfinite(alpha0)) { |
| | return 0; |
| | } |
| | if (alpha0 == 0) { |
| | __recent_cg_status = 'I'; |
| | return 1; |
| | } |
| |
|
| | |
| | ComputeSAX((float)alpha0, _cuVectorPK, |
| | _cuVectorXK); |
| | ComputeJtE(_cuVectorJX, VectorQK); |
| |
|
| | ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK, |
| | VectorQK); |
| | ComputeSAXPY( |
| | (float)-alpha0, VectorQK, _cuVectorJtE, |
| | _cuVectorRK); |
| |
|
| | float_t rtzk = rtz0, rtz_min = rtz0, betak; |
| | int iteration = 1; |
| | ++__num_cg_iteration; |
| |
|
| | while (true) { |
| | ApplyBlockPC(_cuVectorRK, _cuVectorZK); |
| |
|
| | |
| | float_t rtzp = rtzk; |
| | rtzk = (float_t)ComputeVectorDot( |
| | _cuVectorRK, _cuVectorZK, |
| | _cuBufferData); |
| | float_t rtz_ratio = sqrt(fabs(rtzk / rtz0)); |
| | if (rtz_ratio < __cg_norm_threshold) { |
| | if (__recent_cg_status == ' ') |
| | __recent_cg_status = iteration < std::min(10, __cg_min_iteration) |
| | ? '0' + iteration |
| | : 'N'; |
| | if (iteration >= __cg_min_iteration) break; |
| | } |
| |
|
| | |
| | betak = rtzk / rtzp; |
| | rtz_min = std::min(rtz_min, rtzk); |
| |
|
| | ComputeSAXPY((float)betak, _cuVectorPK, _cuVectorZK, |
| | _cuVectorPK); |
| | ComputeJX(_cuVectorPK, _cuVectorJX); |
| | |
| |
|
| | float_t qtqk = |
| | (float_t)ComputeVectorNorm(_cuVectorJX, _cuBufferData); |
| | float_t ptdpk = (float_t)ComputeVectorNormW( |
| | _cuVectorPK, VectorDP, _cuBufferData); |
| | float_t alphak = rtzk / (qtqk + lambda * ptdpk); |
| |
|
| | |
| | if (__verbose_cg_iteration) |
| | std::cout << " --" << iteration << ",\t alpha= " << alphak |
| | << ", rtzk/rtz0 = " << rtz_ratio |
| | << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n"; |
| |
|
| | |
| | if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) { |
| | __recent_cg_status = 'X'; |
| | break; |
| | } |
| |
|
| | |
| | ComputeSAXPY((float)alphak, _cuVectorPK, _cuVectorXK, |
| | _cuVectorXK); |
| |
|
| | |
| | ++iteration; |
| | ++__num_cg_iteration; |
| | if (iteration >= std::min(__cg_max_iteration, plen)) break; |
| |
|
| | |
| | if (__cg_recalculate_freq > 0 && iteration % __cg_recalculate_freq == 0) { |
| | |
| | ComputeJX(_cuVectorXK, _cuVectorJX); |
| | ComputeJtE(_cuVectorJX, VectorQK); |
| | ComputeSXYPZ(lambda, VectorDP, _cuVectorXK, VectorQK, VectorQK); |
| | ComputeSAXPY(-1.0f, VectorQK, _cuVectorJtE, _cuVectorRK); |
| | } else { |
| | ComputeJtE(_cuVectorJX, VectorQK); |
| | ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK, VectorQK); |
| | ComputeSAXPY( |
| | (float)-alphak, VectorQK, _cuVectorRK, |
| | _cuVectorRK); |
| | } |
| | } |
| | return iteration; |
| | } |
| |
|
| | int SparseBundleCU::SolveNormalEquation(float lambda) { |
| | if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { |
| | ComputeBlockPC(lambda, __lm_use_diagonal_damp); |
| | ApplyBlockPC(_cuVectorJtE, _cuVectorXK, 1); |
| | return 1; |
| | } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { |
| | ComputeBlockPC(lambda, __lm_use_diagonal_damp); |
| | ApplyBlockPC(_cuVectorJtE, _cuVectorXK, 2); |
| | return 1; |
| | } else { |
| | |
| | return __cg_schur_complement ? SolveNormalEquationPCGX(lambda) |
| | : SolveNormalEquationPCGB(lambda); |
| | } |
| | } |
| |
|
| | void SparseBundleCU::RunTestIterationLM(bool reduced) { |
| | EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); |
| | EvaluateJacobians(); |
| | ComputeJtE(_cuImageProj, _cuVectorJtE); |
| | if (reduced) |
| | SolveNormalEquationPCGX(__lm_initial_damp); |
| | else |
| | SolveNormalEquationPCGB(__lm_initial_damp); |
| | UpdateCameraPoint(_cuVectorZK, _cuImageProj); |
| | ComputeVectorDot(_cuVectorXK, _cuVectorJtE, _cuBufferData); |
| | ComputeJX(_cuVectorXK, _cuVectorJX); |
| | ComputeVectorNorm(_cuVectorJX, _cuBufferData); |
| | } |
| |
|
| | float SparseBundleCU::UpdateCameraPoint(CuTexImage& dx, |
| | CuTexImage& cuImageTempProj) { |
| | ConfigBA::TimerBA timer(this, TIMER_FUNCTION_UP, true); |
| | if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { |
| | if (__jacobian_normalize) |
| | ComputeVXY(_cuVectorXK, _cuVectorSJ, dx, 8 * _num_camera); |
| | ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, |
| | _cuCameraDataEX, _cuPointDataEX, |
| | __bundle_current_mode); |
| | return EvaluateProjection(_cuCameraDataEX, _cuPointData, cuImageTempProj); |
| | } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { |
| | if (__jacobian_normalize) |
| | ComputeVXY(_cuVectorXK, _cuVectorSJ, dx, 4 * _num_point, 8 * _num_camera); |
| | ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, |
| | _cuCameraDataEX, _cuPointDataEX, |
| | __bundle_current_mode); |
| | return EvaluateProjection(_cuCameraData, _cuPointDataEX, cuImageTempProj); |
| | } else { |
| | if (__jacobian_normalize) ComputeVXY(_cuVectorXK, _cuVectorSJ, dx); |
| | ProgramCU::UpdateCameraPoint(_num_camera, _cuCameraData, _cuPointData, dx, |
| | _cuCameraDataEX, _cuPointDataEX, |
| | __bundle_current_mode); |
| | return EvaluateProjection(_cuCameraDataEX, _cuPointDataEX, cuImageTempProj); |
| | } |
| | } |
| |
|
| | float SparseBundleCU::SaveUpdatedSystem(float residual_reduction, |
| | float dx_sqnorm, float damping) { |
| | float expected_reduction; |
| | if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { |
| | CuTexImage xk; |
| | xk.SetTexture(_cuVectorXK.data(), 8 * _num_camera); |
| | CuTexImage jte; |
| | jte.SetTexture(_cuVectorJtE.data(), 8 * _num_camera); |
| | float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData); |
| | if (__lm_use_diagonal_damp) { |
| | CuTexImage jj; |
| | jj.SetTexture(_cuVectorJJ.data(), 8 * _num_camera); |
| | float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData); |
| | expected_reduction = damping * dq + dxtg; |
| | } else { |
| | expected_reduction = damping * dx_sqnorm + dxtg; |
| | } |
| | _cuCameraData.SwapData(_cuCameraDataEX); |
| | } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { |
| | CuTexImage xk; |
| | xk.SetTexture(_cuVectorXK.data() + 8 * _num_camera, 4 * _num_point); |
| | CuTexImage jte; |
| | jte.SetTexture(_cuVectorJtE.data() + 8 * _num_camera, 4 * _num_point); |
| | float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData); |
| | if (__lm_use_diagonal_damp) { |
| | CuTexImage jj; |
| | jj.SetTexture(_cuVectorJJ.data() + 8 * _num_camera, 4 * _num_point); |
| | float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData); |
| | expected_reduction = damping * dq + dxtg; |
| | } else { |
| | expected_reduction = damping * dx_sqnorm + dxtg; |
| | } |
| | _cuPointData.SwapData(_cuPointDataEX); |
| | } else { |
| | float dxtg = |
| | (float)ComputeVectorDot(_cuVectorXK, _cuVectorJtE, _cuBufferData); |
| |
|
| | if (__accurate_gain_ratio) { |
| | ComputeJX(_cuVectorXK, _cuVectorJX); |
| | float njx = (float)ComputeVectorNorm(_cuVectorJX, _cuBufferData); |
| | expected_reduction = 2.0f * dxtg - njx; |
| | |
| | if (expected_reduction <= 0) |
| | expected_reduction = 0.001f * residual_reduction; |
| | } else if (__lm_use_diagonal_damp) { |
| | float dq = |
| | (float)ComputeVectorNormW(_cuVectorXK, _cuVectorJJ, _cuBufferData); |
| | expected_reduction = damping * dq + dxtg; |
| | } else { |
| | expected_reduction = damping * dx_sqnorm + dxtg; |
| | } |
| |
|
| | |
| | _cuCameraData.SwapData(_cuCameraDataEX); |
| | _cuPointData.SwapData(_cuPointDataEX); |
| |
|
| | |
| | |
| | |
| | } |
| | |
| | return float(residual_reduction / expected_reduction); |
| | } |
| |
|
| | void SparseBundleCU::AdjustBundleAdjsutmentMode() { |
| | if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { |
| | _cuJacobianCamera.InitTexture(0, 0); |
| | _cuJacobianCameraT.InitTexture(0, 0); |
| | } |
| | } |
| |
|
| | float SparseBundleCU::EvaluateDeltaNorm() { |
| | if (__bundle_current_mode == BUNDLE_ONLY_MOTION) { |
| | CuTexImage temp; |
| | temp.SetTexture(_cuVectorXK.data(), 8 * _num_camera); |
| | return ComputeVectorNorm(temp, _cuBufferData); |
| |
|
| | } else if (__bundle_current_mode == BUNDLE_ONLY_STRUCTURE) { |
| | CuTexImage temp; |
| | temp.SetTexture(_cuVectorXK.data() + 8 * _num_camera, 4 * _num_point); |
| | return ComputeVectorNorm(temp, _cuBufferData); |
| | } else { |
| | return (float)ComputeVectorNorm(_cuVectorXK, _cuBufferData); |
| | } |
| | } |
| |
|
| | void SparseBundleCU::NonlinearOptimizeLM() { |
| | |
| | TimerBA timer(this, TIMER_OPTIMIZATION); |
| |
|
| | |
| | float mse_convert_ratio = |
| | 1.0f / (_num_imgpt * __focal_scaling * __focal_scaling); |
| | float error_display_ratio = __verbose_sse ? _num_imgpt : 1.0f; |
| | const int edwidth = __verbose_sse ? 12 : 8; |
| | _projection_sse = |
| | EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); |
| | __initial_mse = __final_mse = _projection_sse * mse_convert_ratio; |
| |
|
| | |
| | if (__jacobian_normalize) PrepareJacobianNormalization(); |
| |
|
| | |
| | EvaluateJacobians(); |
| | ComputeJtE(_cuImageProj, _cuVectorJtE); |
| | |
| | if (__verbose_level) |
| | std::cout << "Initial " << (__verbose_sse ? "sumed" : "mean") |
| | << " squared error = " << __initial_mse * error_display_ratio |
| | << "\n----------------------------------------------\n"; |
| |
|
| | |
| | CuTexImage& cuImageTempProj = _cuVectorJX; |
| | |
| | CuTexImage& cuVectorDX = _cuVectorSJ.IsValid() ? _cuVectorZK : _cuVectorXK; |
| |
|
| | |
| | float damping_adjust = 2.0f, damping = __lm_initial_damp, g_norm, g_inf; |
| | SaveBundleRecord(0, _projection_sse * mse_convert_ratio, damping, g_norm, |
| | g_inf); |
| |
|
| | |
| | std::cout << std::left; |
| | for (int i = 0; i < __lm_max_iteration && !__abort_flag; |
| | __current_iteration = (++i)) { |
| | |
| | int num_cg_iteration = SolveNormalEquation(damping); |
| |
|
| | |
| | if (num_cg_iteration == 0) { |
| | if (__verbose_level) |
| | std::cout << "#" << std::setw(3) << i << " quit on numeric errors\n"; |
| | __pba_return_code = 'E'; |
| | break; |
| | } |
| |
|
| | |
| | if (__recent_cg_status == 'I') { |
| | std::cout << "#" << std::setw(3) << i << " 0 I e=" << std::setw(edwidth) |
| | << "------- " |
| | << " u=" << std::setprecision(3) << std::setw(9) << damping |
| | << '\n' << std::setprecision(6); |
| | |
| | damping = damping * damping_adjust; |
| | damping_adjust = 2.0f * damping_adjust; |
| | --i; |
| | continue; |
| | } |
| |
|
| | |
| | ++__num_lm_iteration; |
| |
|
| | |
| | float dx_sqnorm = EvaluateDeltaNorm(), dx_norm = sqrt(dx_sqnorm); |
| |
|
| | |
| | |
| | if (dx_norm <= __lm_delta_threshold) { |
| | |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i << " " << std::setw(3) |
| | << num_cg_iteration << char(__recent_cg_status) |
| | << " quit on too small change (" << dx_norm << " < " |
| | << __lm_delta_threshold << ")\n"; |
| | __pba_return_code = 'S'; |
| | break; |
| | } |
| | |
| | |
| | float new_residual = UpdateCameraPoint(cuVectorDX, cuImageTempProj); |
| | float average_residual = new_residual * mse_convert_ratio; |
| | float residual_reduction = _projection_sse - new_residual; |
| |
|
| | |
| | if (isfinite(new_residual) && residual_reduction > 0) { |
| | |
| | float relative_reduction = 1.0f - (new_residual / _projection_sse); |
| |
|
| | |
| | __num_lm_success++; |
| | _projection_sse = new_residual; |
| | _cuImageProj.SwapData(cuImageTempProj); |
| |
|
| | |
| | float gain_ratio = |
| | SaveUpdatedSystem(residual_reduction, dx_sqnorm, damping); |
| |
|
| | |
| | SaveBundleRecord(i + 1, _projection_sse * mse_convert_ratio, damping, |
| | g_norm, g_inf); |
| |
|
| | |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i << " " << std::setw(3) |
| | << num_cg_iteration << char(__recent_cg_status) |
| | << " e=" << std::setw(edwidth) |
| | << average_residual * error_display_ratio |
| | << " u=" << std::setprecision(3) << std::setw(9) << damping |
| | << " r=" << std::setw(6) |
| | << floor(gain_ratio * 1000.f) * 0.001f |
| | << " g=" << std::setw(g_norm > 0 ? 9 : 1) << g_norm << " " |
| | << std::setw(9) << relative_reduction << ' ' << std::setw(9) |
| | << dx_norm << " t=" << int(BundleTimerGetNow()) << "\n" |
| | << std::setprecision(6); |
| |
|
| | |
| | if (!IsTimeBudgetAvailable()) { |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i << " used up time budget.\n"; |
| | __pba_return_code = 'T'; |
| | break; |
| | } else if (__lm_check_gradient && g_inf < __lm_gradient_threshold) { |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i |
| | << " converged with small gradient\n"; |
| | __pba_return_code = 'G'; |
| | break; |
| | } else if (average_residual * error_display_ratio <= __lm_mse_threshold) { |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i << " satisfies MSE threshold\n"; |
| | __pba_return_code = 'M'; |
| | break; |
| | } else { |
| | |
| | float temp = gain_ratio * 2.0f - 1.0f; |
| | float adaptive_adjust = 1.0f - temp * temp * temp; |
| | float auto_adjust = std::max(1.0f / 3.0f, adaptive_adjust); |
| |
|
| | |
| | damping = damping * auto_adjust; |
| | damping_adjust = 2.0f; |
| | if (damping < __lm_minimum_damp) |
| | damping = __lm_minimum_damp; |
| | else if (__lm_damping_auto_switch == 0 && damping > __lm_maximum_damp && |
| | __lm_use_diagonal_damp) |
| | damping = __lm_maximum_damp; |
| |
|
| | EvaluateJacobians(); |
| | ComputeJtE(_cuImageProj, _cuVectorJtE); |
| | } |
| | } else { |
| | if (__verbose_level > 1) |
| | std::cout << "#" << std::setw(3) << i << " " << std::setw(3) |
| | << num_cg_iteration << char(__recent_cg_status) |
| | << " e=" << std::setw(edwidth) << std::left |
| | << average_residual * error_display_ratio |
| | << " u=" << std::setprecision(3) << std::setw(9) << damping |
| | << " r=----- " << (__lm_check_gradient || __save_gradient_norm |
| | ? " g=---------" |
| | : " g=0") |
| | << " --------- " << std::setw(9) << dx_norm |
| | << " t=" << int(BundleTimerGetNow()) << "\n" |
| | << std::setprecision(6); |
| |
|
| | if (__lm_damping_auto_switch > 0 && __lm_use_diagonal_damp && |
| | damping > __lm_damping_auto_switch) { |
| | __lm_use_diagonal_damp = false; |
| | damping = __lm_damping_auto_switch; |
| | damping_adjust = 2.0f; |
| | if (__verbose_level > 1) |
| | std::cout << "NOTE: switch to damping with an identity matix\n"; |
| | } else { |
| | |
| | damping = damping * damping_adjust; |
| | damping_adjust = 2.0f * damping_adjust; |
| | } |
| | } |
| |
|
| | if (__verbose_level == 1) std::cout << '.'; |
| | } |
| |
|
| | __final_mse = float(_projection_sse * mse_convert_ratio); |
| | __final_mse_x = |
| | __use_radial_distortion |
| | ? EvaluateProjectionX(_cuCameraData, _cuPointData, _cuImageProj) * |
| | mse_convert_ratio |
| | : __final_mse; |
| | } |
| |
|
| | #define PROFILE_(A, B) \ |
| | BundleTimerStart(TIMER_PROFILE_STEP); \ |
| | for (int i = 0; i < repeat; ++i) { \ |
| | B; \ |
| | FinishWorkCUDA(); \ |
| | } \ |
| | BundleTimerSwitch(TIMER_PROFILE_STEP); \ |
| | std::cout << std::setw(24) << A << ": " \ |
| | << (BundleTimerGet(TIMER_PROFILE_STEP) / repeat) << "\n"; |
| |
|
| | #define PROFILE(A, B) PROFILE_(#A, A B) |
| | #define PROXILE(A, B) PROFILE_(A, B) |
| |
|
| | void SparseBundleCU::RunProfileSteps() { |
| | const int repeat = __profile_pba; |
| | std::cout << "---------------------------------\n" |
| | "| Run profiling steps (" |
| | << repeat << ") |\n" |
| | "---------------------------------\n" |
| | << std::left; |
| | ; |
| |
|
| | |
| | PROXILE("Upload Measurements", |
| | _cuMeasurements.CopyFromHost( |
| | _imgpt_datax.size() > 0 ? &_imgpt_datax[0] : _imgpt_data)); |
| | PROXILE("Upload Point Data", _cuPointData.CopyToHost(_point_data)); |
| | std::cout << "---------------------------------\n"; |
| |
|
| | |
| | EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); |
| | PrepareJacobianNormalization(); |
| | EvaluateJacobians(); |
| | ComputeJtE(_cuImageProj, _cuVectorJtE); |
| | ComputeBlockPC(__lm_initial_damp, true); |
| | FinishWorkCUDA(); |
| |
|
| | do { |
| | if (SolveNormalEquationPCGX(__lm_initial_damp) == 10 && |
| | SolveNormalEquationPCGB(__lm_initial_damp) == 10) |
| | break; |
| | __lm_initial_damp *= 2.0f; |
| | } while (__lm_initial_damp < 1024.0f); |
| | std::cout << "damping set to " << __lm_initial_damp << " for profiling\n" |
| | << "---------------------------------\n"; |
| |
|
| | { |
| | int repeat = 10, cgmin = __cg_min_iteration, cgmax = __cg_max_iteration; |
| | __cg_max_iteration = __cg_min_iteration = 10; |
| | __num_cg_iteration = 0; |
| | PROFILE(SolveNormalEquationPCGX, (__lm_initial_damp)); |
| | if (__num_cg_iteration != 100) |
| | std::cout << __num_cg_iteration << " cg iterations in all\n"; |
| |
|
| | |
| | __num_cg_iteration = 0; |
| | PROFILE(SolveNormalEquationPCGB, (__lm_initial_damp)); |
| | if (__num_cg_iteration != 100) |
| | std::cout << __num_cg_iteration << " cg iterations in all\n"; |
| | std::cout << "---------------------------------\n"; |
| | |
| | __num_cg_iteration = 0; |
| | PROXILE("Single iteration LMX", RunTestIterationLM(true)); |
| | if (__num_cg_iteration != 100) |
| | std::cout << __num_cg_iteration << " cg iterations in all\n"; |
| | |
| | __num_cg_iteration = 0; |
| | PROXILE("Single iteration LMB", RunTestIterationLM(false)); |
| | if (__num_cg_iteration != 100) |
| | std::cout << __num_cg_iteration << " cg iterations in all\n"; |
| | std::cout << "---------------------------------\n"; |
| | __cg_max_iteration = cgmax; |
| | __cg_min_iteration = cgmin; |
| | } |
| | |
| | PROFILE(UpdateCameraPoint, (_cuVectorZK, _cuImageProj)); |
| | PROFILE(ComputeVectorNorm, (_cuVectorXK, _cuBufferData)); |
| | PROFILE(ComputeVectorDot, (_cuVectorXK, _cuVectorRK, _cuBufferData)); |
| | PROFILE(ComputeVectorNormW, (_cuVectorXK, _cuVectorRK, _cuBufferData)); |
| | PROFILE(ComputeSAXPY, (0.01f, _cuVectorXK, _cuVectorRK, _cuVectorZK)); |
| | PROFILE(ComputeSXYPZ, |
| | (0.01f, _cuVectorXK, _cuVectorPK, _cuVectorRK, _cuVectorZK)); |
| | std::cout << "---------------------------------\n"; |
| | PROFILE(ComputeVectorNorm, (_cuImageProj, _cuBufferData)); |
| | PROFILE(ComputeSAXPY, (0.000f, _cuImageProj, _cuVectorJX, _cuVectorJX)); |
| | std::cout << "---------------------------------\n"; |
| |
|
| | __multiply_jx_usenoj = false; |
| | |
| | PROFILE(EvaluateProjection, (_cuCameraData, _cuPointData, _cuImageProj)); |
| | PROFILE(ApplyBlockPC, (_cuVectorJtE, _cuVectorPK)); |
| | |
| | if (!__no_jacobian_store) { |
| | if (__jc_store_original) { |
| | PROFILE(ComputeJX, (_cuVectorJtE, _cuVectorJX)); |
| | PROFILE(EvaluateJacobians, (false)); |
| |
|
| | if (__jc_store_transpose) { |
| | PROFILE( |
| | ShuffleCameraJacobian, |
| | (_cuJacobianCamera, _cuCameraMeasurementList, _cuJacobianCameraT)); |
| | PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); |
| | PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); |
| | PROFILE(ComputeBlockPC, (0.001f, true)); |
| |
|
| | std::cout << "---------------------------------\n" |
| | "| Not storing original JC | \n" |
| | "---------------------------------\n"; |
| | __jc_store_original = false; |
| | PROFILE(EvaluateJacobians, ()); |
| | __jc_store_original = true; |
| | } |
| | |
| |
|
| | std::cout << "---------------------------------\n" |
| | "| Not storing transpose JC | \n" |
| | "---------------------------------\n"; |
| | __jc_store_transpose = false; |
| | PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); |
| | PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); |
| | PROFILE(ComputeBlockPC, (0.001f, true)); |
| |
|
| | |
| |
|
| | } else if (__jc_store_transpose) { |
| | PROFILE(ComputeDiagonal, (_cuVectorJJ, _cuVectorPK)); |
| | PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); |
| | PROFILE(ComputeBlockPC, (0.001f, true)); |
| | std::cout << "---------------------------------\n" |
| | "| Not storing original JC | \n" |
| | "---------------------------------\n"; |
| | PROFILE(EvaluateJacobians, ()); |
| | } |
| | } |
| |
|
| | if (!__no_jacobian_store) { |
| | std::cout << "---------------------------------\n" |
| | "| Not storing Camera Jacobians | \n" |
| | "---------------------------------\n"; |
| | __jc_store_transpose = false; |
| | __jc_store_original = false; |
| | _cuJacobianCamera.ReleaseData(); |
| | _cuJacobianCameraT.ReleaseData(); |
| | PROFILE(EvaluateJacobians, ()); |
| | PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); |
| | PROFILE(ComputeBlockPC, (0.001f, true)); |
| | } |
| |
|
| | |
| |
|
| | std::cout << "---------------------------------\n" |
| | "| Not storing any jacobians |\n" |
| | "---------------------------------\n"; |
| | __no_jacobian_store = true; |
| | _cuJacobianPoint.ReleaseData(); |
| | PROFILE(ComputeJX, (_cuVectorJtE, _cuVectorJX)); |
| | PROFILE(ComputeJtE, (_cuImageProj, _cuVectorJtE)); |
| | PROFILE(ComputeBlockPC, (0.001f, true)); |
| |
|
| | std::cout << "---------------------------------\n"; |
| | } |
| |
|
| | void SparseBundleCU::RunDebugSteps() { |
| | EvaluateProjection(_cuCameraData, _cuPointData, _cuImageProj); |
| | EvaluateJacobians(); |
| | ComputeJtE(_cuImageProj, _cuVectorJtE); |
| | |
| | DEBUG_FUNCN(_cuVectorJtE, ComputeJtE, (_cuImageProj, _cuVectorJtE), 100); |
| | DEBUG_FUNCN(_cuVectorJX, ComputeJX, (_cuVectorJtE, _cuVectorJX), 100); |
| | } |
| |
|
| | void SparseBundleCU::SaveNormalEquation(float lambda) { |
| | ofstream out1("../../matlab/cg_j.txt"); |
| | ofstream out2("../../matlab/cg_b.txt"); |
| | ofstream out3("../../matlab/cg_x.txt"); |
| |
|
| | out1 << std::setprecision(20); |
| | out2 << std::setprecision(20); |
| | out3 << std::setprecision(20); |
| |
|
| | int plen = GetParameterLength(); |
| | vector<float> jc(16 * _num_imgpt); |
| | vector<float> jp(8 * _num_imgpt); |
| | vector<float> ee(2 * _num_imgpt); |
| | vector<float> dx(plen); |
| |
|
| | _cuJacobianCamera.CopyToHost(&jc[0]); |
| | _cuJacobianPoint.CopyToHost(&jp[0]); |
| | _cuImageProj.CopyToHost(&ee[0]); |
| | _cuVectorXK.CopyToHost(&dx[0]); |
| |
|
| | for (int i = 0; i < _num_imgpt; ++i) { |
| | out2 << ee[i * 2] << ' ' << ee[i * 2 + 1] << ' '; |
| | int cidx = _camera_idx[i], pidx = _point_idx[i]; |
| | float *cp = &jc[i * 16], *pp = &jp[i * 8]; |
| | int cmin = cidx * 8, pmin = 8 * _num_camera + pidx * 4; |
| | for (int j = 0; j < 8; ++j) |
| | out1 << (i * 2 + 1) << ' ' << (cmin + j + 1) << ' ' << cp[j] << '\n'; |
| | for (int j = 0; j < 8; ++j) |
| | out1 << (i * 2 + 2) << ' ' << (cmin + j + 1) << ' ' << cp[j + 8] << '\n'; |
| | for (int j = 0; j < 4; ++j) |
| | out1 << (i * 2 + 1) << ' ' << (pmin + j + 1) << ' ' << pp[j] << '\n'; |
| | for (int j = 0; j < 4; ++j) |
| | out1 << (i * 2 + 2) << ' ' << (pmin + j + 1) << ' ' << pp[j + 4] << '\n'; |
| | } |
| |
|
| | for (size_t i = 0; i < dx.size(); ++i) out3 << dx[i] << ' '; |
| |
|
| | std::cout << "lambda = " << std::setprecision(20) << lambda << '\n'; |
| | } |
| |
|
| | } |
| |
|