| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #define _USE_MATH_DEFINES |
| |
|
| | #include "mvs/patch_match_cuda.h" |
| |
|
| | #include <algorithm> |
| | #include <cfloat> |
| | #include <cmath> |
| | #include <cstdint> |
| | #include <sstream> |
| |
|
| | #include "util/cuda.h" |
| | #include "util/cudacc.h" |
| | #include "util/logging.h" |
| |
|
| | |
| | |
| | #define THREADS_PER_BLOCK 32 |
| |
|
| | |
| | |
| | #ifndef DEG2RAD |
| | #define DEG2RAD(deg) deg * 0.0174532925199432 |
| | #endif |
| |
|
| | namespace colmap { |
| | namespace mvs { |
| |
|
| | texture<uint8_t, cudaTextureType2D, cudaReadModeNormalizedFloat> |
| | ref_image_texture; |
| | texture<uint8_t, cudaTextureType2DLayered, cudaReadModeNormalizedFloat> |
| | src_images_texture; |
| | texture<float, cudaTextureType2DLayered, cudaReadModeElementType> |
| | src_depth_maps_texture; |
| | texture<float, cudaTextureType2D, cudaReadModeElementType> poses_texture; |
| |
|
| | |
| | __constant__ float ref_K[4]; |
| | |
| | __constant__ float ref_inv_K[4]; |
| |
|
| | __device__ inline void Mat33DotVec3(const float mat[9], const float vec[3], |
| | float result[3]) { |
| | result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2]; |
| | result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2]; |
| | result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2]; |
| | } |
| |
|
| | __device__ inline void Mat33DotVec3Homogeneous(const float mat[9], |
| | const float vec[2], |
| | float result[2]) { |
| | const float inv_z = 1.0f / (mat[6] * vec[0] + mat[7] * vec[1] + mat[8]); |
| | result[0] = inv_z * (mat[0] * vec[0] + mat[1] * vec[1] + mat[2]); |
| | result[1] = inv_z * (mat[3] * vec[0] + mat[4] * vec[1] + mat[5]); |
| | } |
| |
|
| | __device__ inline float DotProduct3(const float vec1[3], const float vec2[3]) { |
| | return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2]; |
| | } |
| |
|
| | __device__ inline float GenerateRandomDepth(const float depth_min, |
| | const float depth_max, |
| | curandState* rand_state) { |
| | return curand_uniform(rand_state) * (depth_max - depth_min) + depth_min; |
| | } |
| |
|
| | __device__ inline void GenerateRandomNormal(const int row, const int col, |
| | curandState* rand_state, |
| | float normal[3]) { |
| | |
| | |
| | float v1 = 0.0f; |
| | float v2 = 0.0f; |
| | float s = 2.0f; |
| | while (s >= 1.0f) { |
| | v1 = 2.0f * curand_uniform(rand_state) - 1.0f; |
| | v2 = 2.0f * curand_uniform(rand_state) - 1.0f; |
| | s = v1 * v1 + v2 * v2; |
| | } |
| |
|
| | const float s_norm = sqrt(1.0f - s); |
| | normal[0] = 2.0f * v1 * s_norm; |
| | normal[1] = 2.0f * v2 * s_norm; |
| | normal[2] = 1.0f - 2.0f * s; |
| |
|
| | |
| | const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], |
| | ref_inv_K[2] * row + ref_inv_K[3], 1.0f}; |
| | if (DotProduct3(normal, view_ray) > 0) { |
| | normal[0] = -normal[0]; |
| | normal[1] = -normal[1]; |
| | normal[2] = -normal[2]; |
| | } |
| | } |
| |
|
| | __device__ inline float PerturbDepth(const float perturbation, |
| | const float depth, |
| | curandState* rand_state) { |
| | const float depth_min = (1.0f - perturbation) * depth; |
| | const float depth_max = (1.0f + perturbation) * depth; |
| | return GenerateRandomDepth(depth_min, depth_max, rand_state); |
| | } |
| |
|
| | __device__ inline void PerturbNormal(const int row, const int col, |
| | const float perturbation, |
| | const float normal[3], |
| | curandState* rand_state, |
| | float perturbed_normal[3], |
| | const int num_trials = 0) { |
| | |
| | const float a1 = (curand_uniform(rand_state) - 0.5f) * perturbation; |
| | const float a2 = (curand_uniform(rand_state) - 0.5f) * perturbation; |
| | const float a3 = (curand_uniform(rand_state) - 0.5f) * perturbation; |
| |
|
| | const float sin_a1 = sin(a1); |
| | const float sin_a2 = sin(a2); |
| | const float sin_a3 = sin(a3); |
| | const float cos_a1 = cos(a1); |
| | const float cos_a2 = cos(a2); |
| | const float cos_a3 = cos(a3); |
| |
|
| | |
| | float R[9]; |
| | R[0] = cos_a2 * cos_a3; |
| | R[1] = -cos_a2 * sin_a3; |
| | R[2] = sin_a2; |
| | R[3] = cos_a1 * sin_a3 + cos_a3 * sin_a1 * sin_a2; |
| | R[4] = cos_a1 * cos_a3 - sin_a1 * sin_a2 * sin_a3; |
| | R[5] = -cos_a2 * sin_a1; |
| | R[6] = sin_a1 * sin_a3 - cos_a1 * cos_a3 * sin_a2; |
| | R[7] = cos_a3 * sin_a1 + cos_a1 * sin_a2 * sin_a3; |
| | R[8] = cos_a1 * cos_a2; |
| |
|
| | |
| | Mat33DotVec3(R, normal, perturbed_normal); |
| |
|
| | |
| | |
| | const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], |
| | ref_inv_K[2] * row + ref_inv_K[3], 1.0f}; |
| | if (DotProduct3(perturbed_normal, view_ray) >= 0.0f) { |
| | const int kMaxNumTrials = 3; |
| | if (num_trials < kMaxNumTrials) { |
| | PerturbNormal(row, col, 0.5f * perturbation, normal, rand_state, |
| | perturbed_normal, num_trials + 1); |
| | return; |
| | } else { |
| | perturbed_normal[0] = normal[0]; |
| | perturbed_normal[1] = normal[1]; |
| | perturbed_normal[2] = normal[2]; |
| | return; |
| | } |
| | } |
| |
|
| | |
| | const float inv_norm = rsqrt(DotProduct3(perturbed_normal, perturbed_normal)); |
| | perturbed_normal[0] *= inv_norm; |
| | perturbed_normal[1] *= inv_norm; |
| | perturbed_normal[2] *= inv_norm; |
| | } |
| |
|
| | __device__ inline void ComputePointAtDepth(const float row, const float col, |
| | const float depth, float point[3]) { |
| | point[0] = depth * (ref_inv_K[0] * col + ref_inv_K[1]); |
| | point[1] = depth * (ref_inv_K[2] * row + ref_inv_K[3]); |
| | point[2] = depth; |
| | } |
| |
|
| | |
| | |
| | |
| | __device__ inline float PropagateDepth(const float depth1, |
| | const float normal1[3], const float row1, |
| | const float row2) { |
| | |
| | const float x1 = depth1 * (ref_inv_K[2] * row1 + ref_inv_K[3]); |
| | const float y1 = depth1; |
| | |
| | const float x2 = x1 + normal1[2]; |
| | const float y2 = y1 - normal1[1]; |
| |
|
| | |
| | |
| | |
| | |
| | const float x4 = ref_inv_K[2] * row2 + ref_inv_K[3]; |
| | |
| |
|
| | |
| | const float denom = x2 - x1 + x4 * (y1 - y2); |
| | constexpr float kEps = 1e-5f; |
| | if (abs(denom) < kEps) { |
| | return depth1; |
| | } |
| | const float nom = y1 * x2 - x1 * y2; |
| | return nom / denom; |
| | } |
| |
|
| | |
| | |
| | |
| | __device__ inline void ComputeViewingAngles(const float point[3], |
| | const float normal[3], |
| | const int image_idx, |
| | float* cos_triangulation_angle, |
| | float* cos_incident_angle) { |
| | *cos_triangulation_angle = 0.0f; |
| | *cos_incident_angle = 0.0f; |
| |
|
| | |
| | float C[3]; |
| | for (int i = 0; i < 3; ++i) { |
| | C[i] = tex2D(poses_texture, i + 16, image_idx); |
| | } |
| |
|
| | |
| | const float SX[3] = {C[0] - point[0], C[1] - point[1], C[2] - point[2]}; |
| |
|
| | |
| | const float RX_inv_norm = rsqrt(DotProduct3(point, point)); |
| |
|
| | |
| | const float SX_inv_norm = rsqrt(DotProduct3(SX, SX)); |
| |
|
| | *cos_incident_angle = DotProduct3(SX, normal) * SX_inv_norm; |
| | *cos_triangulation_angle = DotProduct3(SX, point) * RX_inv_norm * SX_inv_norm; |
| | } |
| |
|
| | __device__ inline void ComposeHomography(const int image_idx, const int row, |
| | const int col, const float depth, |
| | const float normal[3], float H[9]) { |
| | |
| | float K[4]; |
| | for (int i = 0; i < 4; ++i) { |
| | K[i] = tex2D(poses_texture, i, image_idx); |
| | } |
| |
|
| | |
| | float R[9]; |
| | for (int i = 0; i < 9; ++i) { |
| | R[i] = tex2D(poses_texture, i + 4, image_idx); |
| | } |
| |
|
| | |
| | float T[3]; |
| | for (int i = 0; i < 3; ++i) { |
| | T[i] = tex2D(poses_texture, i + 13, image_idx); |
| | } |
| |
|
| | |
| | const float dist = |
| | depth * (normal[0] * (ref_inv_K[0] * col + ref_inv_K[1]) + |
| | normal[1] * (ref_inv_K[2] * row + ref_inv_K[3]) + normal[2]); |
| | const float inv_dist = 1.0f / dist; |
| |
|
| | const float inv_dist_N0 = inv_dist * normal[0]; |
| | const float inv_dist_N1 = inv_dist * normal[1]; |
| | const float inv_dist_N2 = inv_dist * normal[2]; |
| |
|
| | |
| | H[0] = ref_inv_K[0] * (K[0] * (R[0] + inv_dist_N0 * T[0]) + |
| | K[1] * (R[6] + inv_dist_N0 * T[2])); |
| | H[1] = ref_inv_K[2] * (K[0] * (R[1] + inv_dist_N1 * T[0]) + |
| | K[1] * (R[7] + inv_dist_N1 * T[2])); |
| | H[2] = K[0] * (R[2] + inv_dist_N2 * T[0]) + |
| | K[1] * (R[8] + inv_dist_N2 * T[2]) + |
| | ref_inv_K[1] * (K[0] * (R[0] + inv_dist_N0 * T[0]) + |
| | K[1] * (R[6] + inv_dist_N0 * T[2])) + |
| | ref_inv_K[3] * (K[0] * (R[1] + inv_dist_N1 * T[0]) + |
| | K[1] * (R[7] + inv_dist_N1 * T[2])); |
| | H[3] = ref_inv_K[0] * (K[2] * (R[3] + inv_dist_N0 * T[1]) + |
| | K[3] * (R[6] + inv_dist_N0 * T[2])); |
| | H[4] = ref_inv_K[2] * (K[2] * (R[4] + inv_dist_N1 * T[1]) + |
| | K[3] * (R[7] + inv_dist_N1 * T[2])); |
| | H[5] = K[2] * (R[5] + inv_dist_N2 * T[1]) + |
| | K[3] * (R[8] + inv_dist_N2 * T[2]) + |
| | ref_inv_K[1] * (K[2] * (R[3] + inv_dist_N0 * T[1]) + |
| | K[3] * (R[6] + inv_dist_N0 * T[2])) + |
| | ref_inv_K[3] * (K[2] * (R[4] + inv_dist_N1 * T[1]) + |
| | K[3] * (R[7] + inv_dist_N1 * T[2])); |
| | H[6] = ref_inv_K[0] * (R[6] + inv_dist_N0 * T[2]); |
| | H[7] = ref_inv_K[2] * (R[7] + inv_dist_N1 * T[2]); |
| | H[8] = R[8] + ref_inv_K[1] * (R[6] + inv_dist_N0 * T[2]) + |
| | ref_inv_K[3] * (R[7] + inv_dist_N1 * T[2]) + inv_dist_N2 * T[2]; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | template <int kWindowSize> |
| | struct LocalRefImage { |
| | const static int kWindowRadius = kWindowSize / 2; |
| | const static int kThreadBlockRadius = 1; |
| | const static int kThreadBlockSize = 2 * kThreadBlockRadius + 1; |
| | const static int kNumRows = kWindowSize; |
| | const static int kNumColumns = kThreadBlockSize * THREADS_PER_BLOCK; |
| | const static int kDataSize = kNumRows * kNumColumns; |
| |
|
| | float* data = nullptr; |
| |
|
| | __device__ inline void Read(const int row) { |
| | |
| | |
| | |
| | |
| | |
| |
|
| | const int thread_id = threadIdx.x; |
| | const int thread_block_first_id = blockDim.x * blockIdx.x; |
| |
|
| | const int local_col_start = thread_id; |
| | const int global_col_start = thread_block_first_id - |
| | kThreadBlockRadius * THREADS_PER_BLOCK + |
| | thread_id; |
| |
|
| | if (row == 0) { |
| | int global_row = row - kWindowRadius; |
| | for (int local_row = 0; local_row < kNumRows; ++local_row, ++global_row) { |
| | int local_col = local_col_start; |
| | int global_col = global_col_start; |
| | #pragma unroll |
| | for (int block = 0; block < kThreadBlockSize; ++block) { |
| | data[local_row * kNumColumns + local_col] = |
| | tex2D(ref_image_texture, global_col, global_row); |
| | local_col += THREADS_PER_BLOCK; |
| | global_col += THREADS_PER_BLOCK; |
| | } |
| | } |
| | } else { |
| | |
| | for (int local_row = 1; local_row < kNumRows; ++local_row) { |
| | int local_col = local_col_start; |
| | #pragma unroll |
| | for (int block = 0; block < kThreadBlockSize; ++block) { |
| | data[(local_row - 1) * kNumColumns + local_col] = |
| | data[local_row * kNumColumns + local_col]; |
| | local_col += THREADS_PER_BLOCK; |
| | } |
| | } |
| |
|
| | |
| | const int local_row = kNumRows - 1; |
| | const int global_row = row + kWindowRadius; |
| | int local_col = local_col_start; |
| | int global_col = global_col_start; |
| | #pragma unroll |
| | for (int block = 0; block < kThreadBlockSize; ++block) { |
| | data[local_row * kNumColumns + local_col] = |
| | tex2D(ref_image_texture, global_col, global_row); |
| | local_col += THREADS_PER_BLOCK; |
| | global_col += THREADS_PER_BLOCK; |
| | } |
| | } |
| | } |
| | }; |
| |
|
| | |
| | |
| | template <int kWindowSize, int kWindowStep> |
| | struct PhotoConsistencyCostComputer { |
| | const static int kWindowRadius = kWindowSize / 2; |
| |
|
| | __device__ PhotoConsistencyCostComputer(const float sigma_spatial, |
| | const float sigma_color) |
| | : bilateral_weight_computer_(sigma_spatial, sigma_color) {} |
| |
|
| | |
| | const float kMaxCost = 2.0f; |
| |
|
| | |
| | typedef LocalRefImage<kWindowSize> LocalRefImageType; |
| | LocalRefImageType local_ref_image; |
| |
|
| | |
| | float local_ref_sum = 0.0f; |
| | float local_ref_squared_sum = 0.0f; |
| |
|
| | |
| | int src_image_idx = -1; |
| |
|
| | |
| | int row = -1; |
| | int col = -1; |
| |
|
| | |
| | float depth = 0.0f; |
| | const float* normal = nullptr; |
| |
|
| | __device__ inline void Read(const int row) { |
| | local_ref_image.Read(row); |
| | __syncthreads(); |
| | } |
| |
|
| | __device__ inline float Compute() const { |
| | float tform[9]; |
| | ComposeHomography(src_image_idx, row, col, depth, normal, tform); |
| |
|
| | float tform_step[8]; |
| | for (int i = 0; i < 8; ++i) { |
| | tform_step[i] = kWindowStep * tform[i]; |
| | } |
| |
|
| | const int thread_id = threadIdx.x; |
| | const int row_start = row - kWindowRadius; |
| | const int col_start = col - kWindowRadius; |
| |
|
| | float col_src = tform[0] * col_start + tform[1] * row_start + tform[2]; |
| | float row_src = tform[3] * col_start + tform[4] * row_start + tform[5]; |
| | float z = tform[6] * col_start + tform[7] * row_start + tform[8]; |
| | float base_col_src = col_src; |
| | float base_row_src = row_src; |
| | float base_z = z; |
| |
|
| | int ref_image_idx = THREADS_PER_BLOCK - kWindowRadius + thread_id; |
| | int ref_image_base_idx = ref_image_idx; |
| |
|
| | const float ref_center_color = |
| | local_ref_image |
| | .data[ref_image_idx + kWindowRadius * 3 * THREADS_PER_BLOCK + |
| | kWindowRadius]; |
| | const float ref_color_sum = local_ref_sum; |
| | const float ref_color_squared_sum = local_ref_squared_sum; |
| | float src_color_sum = 0.0f; |
| | float src_color_squared_sum = 0.0f; |
| | float src_ref_color_sum = 0.0f; |
| | float bilateral_weight_sum = 0.0f; |
| |
|
| | for (int row = -kWindowRadius; row <= kWindowRadius; row += kWindowStep) { |
| | for (int col = -kWindowRadius; col <= kWindowRadius; col += kWindowStep) { |
| | const float inv_z = 1.0f / z; |
| | const float norm_col_src = inv_z * col_src + 0.5f; |
| | const float norm_row_src = inv_z * row_src + 0.5f; |
| | const float ref_color = local_ref_image.data[ref_image_idx]; |
| | const float src_color = tex2DLayered(src_images_texture, norm_col_src, |
| | norm_row_src, src_image_idx); |
| |
|
| | const float bilateral_weight = bilateral_weight_computer_.Compute( |
| | row, col, ref_center_color, ref_color); |
| |
|
| | const float bilateral_weight_src = bilateral_weight * src_color; |
| |
|
| | src_color_sum += bilateral_weight_src; |
| | src_color_squared_sum += bilateral_weight_src * src_color; |
| | src_ref_color_sum += bilateral_weight_src * ref_color; |
| | bilateral_weight_sum += bilateral_weight; |
| |
|
| | ref_image_idx += kWindowStep; |
| |
|
| | |
| | |
| | |
| | |
| | col_src += tform_step[0]; |
| | row_src += tform_step[3]; |
| | z += tform_step[6]; |
| | } |
| |
|
| | ref_image_base_idx += kWindowStep * 3 * THREADS_PER_BLOCK; |
| | ref_image_idx = ref_image_base_idx; |
| |
|
| | base_col_src += tform_step[1]; |
| | base_row_src += tform_step[4]; |
| | base_z += tform_step[7]; |
| |
|
| | col_src = base_col_src; |
| | row_src = base_row_src; |
| | z = base_z; |
| | } |
| |
|
| | const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; |
| | src_color_sum *= inv_bilateral_weight_sum; |
| | src_color_squared_sum *= inv_bilateral_weight_sum; |
| | src_ref_color_sum *= inv_bilateral_weight_sum; |
| |
|
| | const float ref_color_var = |
| | ref_color_squared_sum - ref_color_sum * ref_color_sum; |
| | const float src_color_var = |
| | src_color_squared_sum - src_color_sum * src_color_sum; |
| |
|
| | |
| | |
| | constexpr float kMinVar = 1e-5f; |
| | if (ref_color_var < kMinVar || src_color_var < kMinVar) { |
| | return kMaxCost; |
| | } else { |
| | const float src_ref_color_covar = |
| | src_ref_color_sum - ref_color_sum * src_color_sum; |
| | const float src_ref_color_var = sqrt(ref_color_var * src_color_var); |
| | return max(0.0f, |
| | min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var)); |
| | } |
| | } |
| |
|
| | private: |
| | const BilateralWeightComputer bilateral_weight_computer_; |
| | }; |
| |
|
| | __device__ inline float ComputeGeomConsistencyCost(const float row, |
| | const float col, |
| | const float depth, |
| | const int image_idx, |
| | const float max_cost) { |
| | |
| | float P[12]; |
| | for (int i = 0; i < 12; ++i) { |
| | P[i] = tex2D(poses_texture, i + 19, image_idx); |
| | } |
| | float inv_P[12]; |
| | for (int i = 0; i < 12; ++i) { |
| | inv_P[i] = tex2D(poses_texture, i + 31, image_idx); |
| | } |
| |
|
| | |
| | float forward_point[3]; |
| | ComputePointAtDepth(row, col, depth, forward_point); |
| |
|
| | |
| | const float inv_forward_z = |
| | 1.0f / (P[8] * forward_point[0] + P[9] * forward_point[1] + |
| | P[10] * forward_point[2] + P[11]); |
| | float src_col = |
| | inv_forward_z * (P[0] * forward_point[0] + P[1] * forward_point[1] + |
| | P[2] * forward_point[2] + P[3]); |
| | float src_row = |
| | inv_forward_z * (P[4] * forward_point[0] + P[5] * forward_point[1] + |
| | P[6] * forward_point[2] + P[7]); |
| |
|
| | |
| | const float src_depth = tex2DLayered(src_depth_maps_texture, src_col + 0.5f, |
| | src_row + 0.5f, image_idx); |
| |
|
| | |
| | if (src_depth == 0.0f) { |
| | return max_cost; |
| | } |
| |
|
| | |
| | src_col *= src_depth; |
| | src_row *= src_depth; |
| | const float backward_point_x = |
| | inv_P[0] * src_col + inv_P[1] * src_row + inv_P[2] * src_depth + inv_P[3]; |
| | const float backward_point_y = |
| | inv_P[4] * src_col + inv_P[5] * src_row + inv_P[6] * src_depth + inv_P[7]; |
| | const float backward_point_z = inv_P[8] * src_col + inv_P[9] * src_row + |
| | inv_P[10] * src_depth + inv_P[11]; |
| | const float inv_backward_point_z = 1.0f / backward_point_z; |
| |
|
| | |
| | const float backward_col = |
| | inv_backward_point_z * |
| | (ref_K[0] * backward_point_x + ref_K[1] * backward_point_z); |
| | const float backward_row = |
| | inv_backward_point_z * |
| | (ref_K[2] * backward_point_y + ref_K[3] * backward_point_z); |
| |
|
| | |
| | |
| | const float diff_col = col - backward_col; |
| | const float diff_row = row - backward_row; |
| | return min(max_cost, sqrt(diff_col * diff_col + diff_row * diff_row)); |
| | } |
| |
|
| | |
| | template <int kNumCosts> |
| | __device__ inline int FindMinCost(const float costs[kNumCosts]) { |
| | float min_cost = costs[0]; |
| | int min_cost_idx = 0; |
| | for (int idx = 1; idx < kNumCosts; ++idx) { |
| | if (costs[idx] <= min_cost) { |
| | min_cost = costs[idx]; |
| | min_cost_idx = idx; |
| | } |
| | } |
| | return min_cost_idx; |
| | } |
| |
|
| | __device__ inline void TransformPDFToCDF(float* probs, const int num_probs) { |
| | float prob_sum = 0.0f; |
| | for (int i = 0; i < num_probs; ++i) { |
| | prob_sum += probs[i]; |
| | } |
| | const float inv_prob_sum = 1.0f / prob_sum; |
| |
|
| | float cum_prob = 0.0f; |
| | for (int i = 0; i < num_probs; ++i) { |
| | const float prob = probs[i] * inv_prob_sum; |
| | cum_prob += prob; |
| | probs[i] = cum_prob; |
| | } |
| | } |
| |
|
| | class LikelihoodComputer { |
| | public: |
| | __device__ LikelihoodComputer(const float ncc_sigma, |
| | const float min_triangulation_angle, |
| | const float incident_angle_sigma) |
| | : cos_min_triangulation_angle_(cos(min_triangulation_angle)), |
| | inv_incident_angle_sigma_square_( |
| | -0.5f / (incident_angle_sigma * incident_angle_sigma)), |
| | inv_ncc_sigma_square_(-0.5f / (ncc_sigma * ncc_sigma)), |
| | ncc_norm_factor_(ComputeNCCCostNormFactor(ncc_sigma)) {} |
| |
|
| | |
| | |
| | __device__ float ComputeForwardMessage(const float cost, |
| | const float prev) const { |
| | return ComputeMessage<true>(cost, prev); |
| | } |
| |
|
| | |
| | |
| | __device__ float ComputeBackwardMessage(const float cost, |
| | const float prev) const { |
| | return ComputeMessage<false>(cost, prev); |
| | } |
| |
|
| | |
| | __device__ inline float ComputeSelProb(const float alpha, const float beta, |
| | const float prev, |
| | const float prev_weight) const { |
| | const float zn0 = (1.0f - alpha) * (1.0f - beta); |
| | const float zn1 = alpha * beta; |
| | const float curr = zn1 / (zn0 + zn1); |
| | return prev_weight * prev + (1.0f - prev_weight) * curr; |
| | } |
| |
|
| | |
| | __device__ inline float ComputeNCCProb(const float cost) const { |
| | return exp(cost * cost * inv_ncc_sigma_square_) * ncc_norm_factor_; |
| | } |
| |
|
| | |
| | __device__ inline float ComputeTriProb( |
| | const float cos_triangulation_angle) const { |
| | const float abs_cos_triangulation_angle = abs(cos_triangulation_angle); |
| | if (abs_cos_triangulation_angle > cos_min_triangulation_angle_) { |
| | const float scaled = 1.0f - (1.0f - abs_cos_triangulation_angle) / |
| | (1.0f - cos_min_triangulation_angle_); |
| | const float likelihood = 1.0f - scaled * scaled; |
| | return min(1.0f, max(0.0f, likelihood)); |
| | } else { |
| | return 1.0f; |
| | } |
| | } |
| |
|
| | |
| | __device__ inline float ComputeIncProb(const float cos_incident_angle) const { |
| | const float x = 1.0f - max(0.0f, cos_incident_angle); |
| | return exp(x * x * inv_incident_angle_sigma_square_); |
| | } |
| |
|
| | |
| | template <int kWindowSize> |
| | __device__ inline float ComputeResolutionProb(const float H[9], |
| | const float row, |
| | const float col) const { |
| | const int kWindowRadius = kWindowSize / 2; |
| |
|
| | |
| | float src1[2]; |
| | const float ref1[2] = {col - kWindowRadius, row - kWindowRadius}; |
| | Mat33DotVec3Homogeneous(H, ref1, src1); |
| | float src2[2]; |
| | const float ref2[2] = {col - kWindowRadius, row + kWindowRadius}; |
| | Mat33DotVec3Homogeneous(H, ref2, src2); |
| | float src3[2]; |
| | const float ref3[2] = {col + kWindowRadius, row + kWindowRadius}; |
| | Mat33DotVec3Homogeneous(H, ref3, src3); |
| | float src4[2]; |
| | const float ref4[2] = {col + kWindowRadius, row - kWindowRadius}; |
| | Mat33DotVec3Homogeneous(H, ref4, src4); |
| |
|
| | |
| | const float ref_area = kWindowSize * kWindowSize; |
| | const float src_area = |
| | abs(0.5f * (src1[0] * src2[1] - src2[0] * src1[1] - src1[0] * src4[1] + |
| | src2[0] * src3[1] - src3[0] * src2[1] + src4[0] * src1[1] + |
| | src3[0] * src4[1] - src4[0] * src3[1])); |
| |
|
| | if (ref_area > src_area) { |
| | return src_area / ref_area; |
| | } else { |
| | return ref_area / src_area; |
| | } |
| | } |
| |
|
| | private: |
| | |
| | |
| | __device__ static inline float ComputeNCCCostNormFactor( |
| | const float ncc_sigma) { |
| | |
| | |
| | return 2.0f / (sqrt(2.0f * M_PI) * ncc_sigma * |
| | erff(2.0f / (ncc_sigma * 1.414213562f))); |
| | } |
| |
|
| | |
| | template <bool kForward> |
| | __device__ inline float ComputeMessage(const float cost, |
| | const float prev) const { |
| | constexpr float kUniformProb = 0.5f; |
| | constexpr float kNoChangeProb = 0.99999f; |
| | const float kChangeProb = 1.0f - kNoChangeProb; |
| | const float emission = ComputeNCCProb(cost); |
| |
|
| | float zn0; |
| | float zn1; |
| | if (kForward) { |
| | zn0 = (prev * kChangeProb + (1.0f - prev) * kNoChangeProb) * kUniformProb; |
| | zn1 = (prev * kNoChangeProb + (1.0f - prev) * kChangeProb) * emission; |
| | } else { |
| | zn0 = prev * emission * kChangeProb + |
| | (1.0f - prev) * kUniformProb * kNoChangeProb; |
| | zn1 = prev * emission * kNoChangeProb + |
| | (1.0f - prev) * kUniformProb * kChangeProb; |
| | } |
| |
|
| | return zn1 / (zn0 + zn1); |
| | } |
| |
|
| | float cos_min_triangulation_angle_; |
| | float inv_incident_angle_sigma_square_; |
| | float inv_ncc_sigma_square_; |
| | float ncc_norm_factor_; |
| | }; |
| |
|
| | |
| | __global__ void InitNormalMap(GpuMat<float> normal_map, |
| | GpuMat<curandState> rand_state_map) { |
| | const int row = blockDim.y * blockIdx.y + threadIdx.y; |
| | const int col = blockDim.x * blockIdx.x + threadIdx.x; |
| | if (col < normal_map.GetWidth() && row < normal_map.GetHeight()) { |
| | curandState rand_state = rand_state_map.Get(row, col); |
| | float normal[3]; |
| | GenerateRandomNormal(row, col, &rand_state, normal); |
| | normal_map.SetSlice(row, col, normal); |
| | rand_state_map.Set(row, col, rand_state); |
| | } |
| | } |
| |
|
| | |
| | __global__ void RotateNormalMap(GpuMat<float> normal_map) { |
| | const int row = blockDim.y * blockIdx.y + threadIdx.y; |
| | const int col = blockDim.x * blockIdx.x + threadIdx.x; |
| | if (col < normal_map.GetWidth() && row < normal_map.GetHeight()) { |
| | float normal[3]; |
| | normal_map.GetSlice(row, col, normal); |
| | float rotated_normal[3]; |
| | rotated_normal[0] = normal[1]; |
| | rotated_normal[1] = -normal[0]; |
| | rotated_normal[2] = normal[2]; |
| | normal_map.SetSlice(row, col, rotated_normal); |
| | } |
| | } |
| |
|
| | template <int kWindowSize, int kWindowStep> |
| | __global__ void ComputeInitialCost(GpuMat<float> cost_map, |
| | const GpuMat<float> depth_map, |
| | const GpuMat<float> normal_map, |
| | const GpuMat<float> ref_sum_image, |
| | const GpuMat<float> ref_squared_sum_image, |
| | const float sigma_spatial, |
| | const float sigma_color) { |
| | const int col = blockDim.x * blockIdx.x + threadIdx.x; |
| |
|
| | typedef PhotoConsistencyCostComputer<kWindowSize, kWindowStep> |
| | PhotoConsistencyCostComputerType; |
| | PhotoConsistencyCostComputerType pcc_computer(sigma_spatial, sigma_color); |
| | pcc_computer.col = col; |
| |
|
| | __shared__ float local_ref_image_data |
| | [PhotoConsistencyCostComputerType::LocalRefImageType::kDataSize]; |
| | pcc_computer.local_ref_image.data = &local_ref_image_data[0]; |
| |
|
| | float normal[3] = {0}; |
| | pcc_computer.normal = normal; |
| |
|
| | for (int row = 0; row < cost_map.GetHeight(); ++row) { |
| | |
| | |
| | pcc_computer.Read(row); |
| |
|
| | if (col < cost_map.GetWidth()) { |
| | pcc_computer.depth = depth_map.Get(row, col); |
| | normal_map.GetSlice(row, col, normal); |
| |
|
| | pcc_computer.row = row; |
| | pcc_computer.local_ref_sum = ref_sum_image.Get(row, col); |
| | pcc_computer.local_ref_squared_sum = ref_squared_sum_image.Get(row, col); |
| |
|
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | pcc_computer.src_image_idx = image_idx; |
| | cost_map.Set(row, col, image_idx, pcc_computer.Compute()); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | struct SweepOptions { |
| | float perturbation = 1.0f; |
| | float depth_min = 0.0f; |
| | float depth_max = 1.0f; |
| | int num_samples = 15; |
| | float sigma_spatial = 3.0f; |
| | float sigma_color = 0.3f; |
| | float ncc_sigma = 0.6f; |
| | float min_triangulation_angle = 0.5f; |
| | float incident_angle_sigma = 0.9f; |
| | float prev_sel_prob_weight = 0.0f; |
| | float geom_consistency_regularizer = 0.1f; |
| | float geom_consistency_max_cost = 5.0f; |
| | float filter_min_ncc = 0.1f; |
| | float filter_min_triangulation_angle = 3.0f; |
| | int filter_min_num_consistent = 2; |
| | float filter_geom_consistency_max_cost = 1.0f; |
| | }; |
| |
|
| | template <int kWindowSize, int kWindowStep, bool kGeomConsistencyTerm = false, |
| | bool kFilterPhotoConsistency = false, |
| | bool kFilterGeomConsistency = false> |
| | __global__ void SweepFromTopToBottom( |
| | GpuMat<float> global_workspace, GpuMat<curandState> rand_state_map, |
| | GpuMat<float> cost_map, GpuMat<float> depth_map, GpuMat<float> normal_map, |
| | GpuMat<uint8_t> consistency_mask, GpuMat<float> sel_prob_map, |
| | const GpuMat<float> prev_sel_prob_map, const GpuMat<float> ref_sum_image, |
| | const GpuMat<float> ref_squared_sum_image, const SweepOptions options) { |
| | const int col = blockDim.x * blockIdx.x + threadIdx.x; |
| |
|
| | |
| | constexpr float kUniformProb = 0.5f; |
| |
|
| | LikelihoodComputer likelihood_computer(options.ncc_sigma, |
| | options.min_triangulation_angle, |
| | options.incident_angle_sigma); |
| |
|
| | float* forward_message = |
| | &global_workspace.GetPtr()[col * global_workspace.GetHeight()]; |
| | float* sampling_probs = |
| | &global_workspace.GetPtr()[global_workspace.GetWidth() * |
| | global_workspace.GetHeight() + |
| | col * global_workspace.GetHeight()]; |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | if (col < cost_map.GetWidth()) { |
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | |
| | float beta = kUniformProb; |
| | for (int row = cost_map.GetHeight() - 1; row >= 0; --row) { |
| | const float cost = cost_map.Get(row, col, image_idx); |
| | beta = likelihood_computer.ComputeBackwardMessage(cost, beta); |
| | sel_prob_map.Set(row, col, image_idx, beta); |
| | } |
| |
|
| | |
| | forward_message[image_idx] = kUniformProb; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | typedef PhotoConsistencyCostComputer<kWindowSize, kWindowStep> |
| | PhotoConsistencyCostComputerType; |
| | PhotoConsistencyCostComputerType pcc_computer(options.sigma_spatial, |
| | options.sigma_color); |
| | pcc_computer.col = col; |
| |
|
| | __shared__ float local_ref_image_data |
| | [PhotoConsistencyCostComputerType::LocalRefImageType::kDataSize]; |
| | pcc_computer.local_ref_image.data = &local_ref_image_data[0]; |
| |
|
| | struct ParamState { |
| | float depth = 0.0f; |
| | float normal[3] = {0}; |
| | }; |
| |
|
| | |
| | ParamState prev_param_state; |
| | |
| | ParamState curr_param_state; |
| | |
| | ParamState rand_param_state; |
| | |
| | curandState rand_state; |
| |
|
| | if (col < cost_map.GetWidth()) { |
| | |
| | rand_state = rand_state_map.Get(0, col); |
| | |
| | prev_param_state.depth = depth_map.Get(0, col); |
| | normal_map.GetSlice(0, col, prev_param_state.normal); |
| | } |
| |
|
| | for (int row = 0; row < cost_map.GetHeight(); ++row) { |
| | |
| | |
| | pcc_computer.Read(row); |
| |
|
| | if (col >= cost_map.GetWidth()) { |
| | continue; |
| | } |
| |
|
| | pcc_computer.row = row; |
| | pcc_computer.local_ref_sum = ref_sum_image.Get(row, col); |
| | pcc_computer.local_ref_squared_sum = ref_squared_sum_image.Get(row, col); |
| |
|
| | |
| | |
| | |
| | |
| | prev_param_state.depth = PropagateDepth( |
| | prev_param_state.depth, prev_param_state.normal, row - 1, row); |
| |
|
| | |
| | curr_param_state.depth = depth_map.Get(row, col); |
| | normal_map.GetSlice(row, col, curr_param_state.normal); |
| |
|
| | |
| | rand_param_state.depth = |
| | PerturbDepth(options.perturbation, curr_param_state.depth, &rand_state); |
| | PerturbNormal(row, col, options.perturbation * M_PI, |
| | curr_param_state.normal, &rand_state, |
| | rand_param_state.normal); |
| |
|
| | |
| | |
| |
|
| | float point[3]; |
| | ComputePointAtDepth(row, col, curr_param_state.depth, point); |
| |
|
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | const float cost = cost_map.Get(row, col, image_idx); |
| | const float alpha = likelihood_computer.ComputeForwardMessage( |
| | cost, forward_message[image_idx]); |
| | const float beta = sel_prob_map.Get(row, col, image_idx); |
| | const float prev_prob = prev_sel_prob_map.Get(row, col, image_idx); |
| | const float sel_prob = likelihood_computer.ComputeSelProb( |
| | alpha, beta, prev_prob, options.prev_sel_prob_weight); |
| |
|
| | float cos_triangulation_angle; |
| | float cos_incident_angle; |
| | ComputeViewingAngles(point, curr_param_state.normal, image_idx, |
| | &cos_triangulation_angle, &cos_incident_angle); |
| | const float tri_prob = |
| | likelihood_computer.ComputeTriProb(cos_triangulation_angle); |
| | const float inc_prob = |
| | likelihood_computer.ComputeIncProb(cos_incident_angle); |
| |
|
| | float H[9]; |
| | ComposeHomography(image_idx, row, col, curr_param_state.depth, |
| | curr_param_state.normal, H); |
| | const float res_prob = |
| | likelihood_computer.ComputeResolutionProb<kWindowSize>(H, row, col); |
| |
|
| | sampling_probs[image_idx] = sel_prob * tri_prob * inc_prob * res_prob; |
| | } |
| |
|
| | TransformPDFToCDF(sampling_probs, cost_map.GetDepth()); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | constexpr int kNumCosts = 5; |
| | float costs[kNumCosts] = {0}; |
| | const float depths[kNumCosts] = { |
| | curr_param_state.depth, prev_param_state.depth, rand_param_state.depth, |
| | curr_param_state.depth, rand_param_state.depth}; |
| | const float* normals[kNumCosts] = { |
| | curr_param_state.normal, prev_param_state.normal, |
| | rand_param_state.normal, rand_param_state.normal, |
| | curr_param_state.normal}; |
| |
|
| | for (int sample = 0; sample < options.num_samples; ++sample) { |
| | const float rand_prob = curand_uniform(&rand_state) - FLT_EPSILON; |
| |
|
| | pcc_computer.src_image_idx = -1; |
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | const float prob = sampling_probs[image_idx]; |
| | if (prob > rand_prob) { |
| | pcc_computer.src_image_idx = image_idx; |
| | break; |
| | } |
| | } |
| |
|
| | if (pcc_computer.src_image_idx == -1) { |
| | continue; |
| | } |
| |
|
| | costs[0] += cost_map.Get(row, col, pcc_computer.src_image_idx); |
| | if (kGeomConsistencyTerm) { |
| | costs[0] += options.geom_consistency_regularizer * |
| | ComputeGeomConsistencyCost( |
| | row, col, depths[0], pcc_computer.src_image_idx, |
| | options.geom_consistency_max_cost); |
| | } |
| |
|
| | for (int i = 1; i < kNumCosts; ++i) { |
| | pcc_computer.depth = depths[i]; |
| | pcc_computer.normal = normals[i]; |
| | costs[i] += pcc_computer.Compute(); |
| | if (kGeomConsistencyTerm) { |
| | costs[i] += options.geom_consistency_regularizer * |
| | ComputeGeomConsistencyCost( |
| | row, col, depths[i], pcc_computer.src_image_idx, |
| | options.geom_consistency_max_cost); |
| | } |
| | } |
| | } |
| |
|
| | |
| | const int min_cost_idx = FindMinCost<kNumCosts>(costs); |
| | const float best_depth = depths[min_cost_idx]; |
| | const float* best_normal = normals[min_cost_idx]; |
| |
|
| | |
| | depth_map.Set(row, col, best_depth); |
| | normal_map.SetSlice(row, col, best_normal); |
| |
|
| | |
| | |
| | pcc_computer.depth = best_depth; |
| | pcc_computer.normal = best_normal; |
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | |
| | float cost; |
| | if (min_cost_idx == 0) { |
| | cost = cost_map.Get(row, col, image_idx); |
| | } else { |
| | pcc_computer.src_image_idx = image_idx; |
| | cost = pcc_computer.Compute(); |
| | cost_map.Set(row, col, image_idx, cost); |
| | } |
| |
|
| | const float alpha = likelihood_computer.ComputeForwardMessage( |
| | cost, forward_message[image_idx]); |
| | const float beta = sel_prob_map.Get(row, col, image_idx); |
| | const float prev_prob = prev_sel_prob_map.Get(row, col, image_idx); |
| | const float prob = likelihood_computer.ComputeSelProb( |
| | alpha, beta, prev_prob, options.prev_sel_prob_weight); |
| | forward_message[image_idx] = alpha; |
| | sel_prob_map.Set(row, col, image_idx, prob); |
| | } |
| |
|
| | if (kFilterPhotoConsistency || kFilterGeomConsistency) { |
| | int num_consistent = 0; |
| |
|
| | float best_point[3]; |
| | ComputePointAtDepth(row, col, best_depth, best_point); |
| |
|
| | const float min_ncc_prob = |
| | likelihood_computer.ComputeNCCProb(1.0f - options.filter_min_ncc); |
| | const float cos_min_triangulation_angle = |
| | cos(options.filter_min_triangulation_angle); |
| |
|
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | float cos_triangulation_angle; |
| | float cos_incident_angle; |
| | ComputeViewingAngles(best_point, best_normal, image_idx, |
| | &cos_triangulation_angle, &cos_incident_angle); |
| | if (cos_triangulation_angle > cos_min_triangulation_angle || |
| | cos_incident_angle <= 0.0f) { |
| | continue; |
| | } |
| |
|
| | if (!kFilterGeomConsistency) { |
| | if (sel_prob_map.Get(row, col, image_idx) >= min_ncc_prob) { |
| | consistency_mask.Set(row, col, image_idx, 1); |
| | num_consistent += 1; |
| | } |
| | } else if (!kFilterPhotoConsistency) { |
| | if (ComputeGeomConsistencyCost(row, col, best_depth, image_idx, |
| | options.geom_consistency_max_cost) <= |
| | options.filter_geom_consistency_max_cost) { |
| | consistency_mask.Set(row, col, image_idx, 1); |
| | num_consistent += 1; |
| | } |
| | } else { |
| | if (sel_prob_map.Get(row, col, image_idx) >= min_ncc_prob && |
| | ComputeGeomConsistencyCost(row, col, best_depth, image_idx, |
| | options.geom_consistency_max_cost) <= |
| | options.filter_geom_consistency_max_cost) { |
| | consistency_mask.Set(row, col, image_idx, 1); |
| | num_consistent += 1; |
| | } |
| | } |
| | } |
| |
|
| | if (num_consistent < options.filter_min_num_consistent) { |
| | depth_map.Set(row, col, 0.0f); |
| | normal_map.Set(row, col, 0, 0.0f); |
| | normal_map.Set(row, col, 1, 0.0f); |
| | normal_map.Set(row, col, 2, 0.0f); |
| | for (int image_idx = 0; image_idx < cost_map.GetDepth(); ++image_idx) { |
| | consistency_mask.Set(row, col, image_idx, 0); |
| | } |
| | } |
| | } |
| |
|
| | |
| | prev_param_state.depth = best_depth; |
| | for (int i = 0; i < 3; ++i) { |
| | prev_param_state.normal[i] = best_normal[i]; |
| | } |
| | } |
| |
|
| | if (col < cost_map.GetWidth()) { |
| | rand_state_map.Set(0, col, rand_state); |
| | } |
| | } |
| |
|
| | PatchMatchCuda::PatchMatchCuda(const PatchMatchOptions& options, |
| | const PatchMatch::Problem& problem) |
| | : options_(options), |
| | problem_(problem), |
| | ref_width_(0), |
| | ref_height_(0), |
| | rotation_in_half_pi_(0) { |
| | SetBestCudaDevice(std::stoi(options_.gpu_index)); |
| | InitRefImage(); |
| | InitSourceImages(); |
| | InitTransforms(); |
| | InitWorkspaceMemory(); |
| | } |
| |
|
| | PatchMatchCuda::~PatchMatchCuda() { |
| | for (size_t i = 0; i < 4; ++i) { |
| | poses_device_[i].reset(); |
| | } |
| | } |
| |
|
| | void PatchMatchCuda::Run() { |
| | #define CASE_WINDOW_RADIUS(window_radius, window_step) \ |
| | case window_radius: \ |
| | RunWithWindowSizeAndStep<2 * window_radius + 1, window_step>(); \ |
| | break; |
| |
|
| | #define CASE_WINDOW_STEP(window_step) \ |
| | case window_step: \ |
| | switch (options_.window_radius) { \ |
| | CASE_WINDOW_RADIUS(1, window_step) \ |
| | CASE_WINDOW_RADIUS(2, window_step) \ |
| | CASE_WINDOW_RADIUS(3, window_step) \ |
| | CASE_WINDOW_RADIUS(4, window_step) \ |
| | CASE_WINDOW_RADIUS(5, window_step) \ |
| | CASE_WINDOW_RADIUS(6, window_step) \ |
| | CASE_WINDOW_RADIUS(7, window_step) \ |
| | CASE_WINDOW_RADIUS(8, window_step) \ |
| | CASE_WINDOW_RADIUS(9, window_step) \ |
| | CASE_WINDOW_RADIUS(10, window_step) \ |
| | CASE_WINDOW_RADIUS(11, window_step) \ |
| | CASE_WINDOW_RADIUS(12, window_step) \ |
| | CASE_WINDOW_RADIUS(13, window_step) \ |
| | CASE_WINDOW_RADIUS(14, window_step) \ |
| | CASE_WINDOW_RADIUS(15, window_step) \ |
| | CASE_WINDOW_RADIUS(16, window_step) \ |
| | CASE_WINDOW_RADIUS(17, window_step) \ |
| | CASE_WINDOW_RADIUS(18, window_step) \ |
| | CASE_WINDOW_RADIUS(19, window_step) \ |
| | CASE_WINDOW_RADIUS(20, window_step) \ |
| | default: { \ |
| | std::cerr << "Error: Window size not supported" << std::endl; \ |
| | break; \ |
| | } \ |
| | } \ |
| | break; |
| |
|
| | switch (options_.window_step) { |
| | CASE_WINDOW_STEP(1) |
| | CASE_WINDOW_STEP(2) |
| | default: { |
| | std::cerr << "Error: Window step not supported" << std::endl; |
| | break; |
| | } |
| | } |
| |
|
| | #undef SWITCH_WINDOW_RADIUS |
| | #undef CALL_RUN_FUNC |
| | } |
| |
|
| | DepthMap PatchMatchCuda::GetDepthMap() const { |
| | return DepthMap(depth_map_->CopyToMat(), options_.depth_min, |
| | options_.depth_max); |
| | } |
| |
|
| | NormalMap PatchMatchCuda::GetNormalMap() const { |
| | return NormalMap(normal_map_->CopyToMat()); |
| | } |
| |
|
| | Mat<float> PatchMatchCuda::GetSelProbMap() const { |
| | return prev_sel_prob_map_->CopyToMat(); |
| | } |
| |
|
| | std::vector<int> PatchMatchCuda::GetConsistentImageIdxs() const { |
| | const Mat<uint8_t> mask = consistency_mask_->CopyToMat(); |
| | std::vector<int> consistent_image_idxs; |
| | std::vector<int> pixel_consistent_image_idxs; |
| | pixel_consistent_image_idxs.reserve(mask.GetDepth()); |
| | for (size_t r = 0; r < mask.GetHeight(); ++r) { |
| | for (size_t c = 0; c < mask.GetWidth(); ++c) { |
| | pixel_consistent_image_idxs.clear(); |
| | for (size_t d = 0; d < mask.GetDepth(); ++d) { |
| | if (mask.Get(r, c, d)) { |
| | pixel_consistent_image_idxs.push_back(problem_.src_image_idxs[d]); |
| | } |
| | } |
| | if (pixel_consistent_image_idxs.size() > 0) { |
| | consistent_image_idxs.push_back(c); |
| | consistent_image_idxs.push_back(r); |
| | consistent_image_idxs.push_back(pixel_consistent_image_idxs.size()); |
| | consistent_image_idxs.insert(consistent_image_idxs.end(), |
| | pixel_consistent_image_idxs.begin(), |
| | pixel_consistent_image_idxs.end()); |
| | } |
| | } |
| | } |
| | return consistent_image_idxs; |
| | } |
| |
|
| | template <int kWindowSize, int kWindowStep> |
| | void PatchMatchCuda::RunWithWindowSizeAndStep() { |
| | |
| | CUDA_SYNC_AND_CHECK(); |
| |
|
| | CudaTimer total_timer; |
| | CudaTimer init_timer; |
| |
|
| | ComputeCudaConfig(); |
| | ComputeInitialCost<kWindowSize, kWindowStep> |
| | <<<sweep_grid_size_, sweep_block_size_>>>( |
| | *cost_map_, *depth_map_, *normal_map_, *ref_image_->sum_image, |
| | *ref_image_->squared_sum_image, options_.sigma_spatial, |
| | options_.sigma_color); |
| | CUDA_SYNC_AND_CHECK(); |
| |
|
| | init_timer.Print("Initialization"); |
| |
|
| | const float total_num_steps = options_.num_iterations * 4; |
| |
|
| | SweepOptions sweep_options; |
| | sweep_options.depth_min = options_.depth_min; |
| | sweep_options.depth_max = options_.depth_max; |
| | sweep_options.sigma_spatial = options_.sigma_spatial; |
| | sweep_options.sigma_color = options_.sigma_color; |
| | sweep_options.num_samples = options_.num_samples; |
| | sweep_options.ncc_sigma = options_.ncc_sigma; |
| | sweep_options.min_triangulation_angle = |
| | DEG2RAD(options_.min_triangulation_angle); |
| | sweep_options.incident_angle_sigma = options_.incident_angle_sigma; |
| | sweep_options.geom_consistency_regularizer = |
| | options_.geom_consistency_regularizer; |
| | sweep_options.geom_consistency_max_cost = options_.geom_consistency_max_cost; |
| | sweep_options.filter_min_ncc = options_.filter_min_ncc; |
| | sweep_options.filter_min_triangulation_angle = |
| | DEG2RAD(options_.filter_min_triangulation_angle); |
| | sweep_options.filter_min_num_consistent = options_.filter_min_num_consistent; |
| | sweep_options.filter_geom_consistency_max_cost = |
| | options_.filter_geom_consistency_max_cost; |
| |
|
| | for (int iter = 0; iter < options_.num_iterations; ++iter) { |
| | CudaTimer iter_timer; |
| |
|
| | for (int sweep = 0; sweep < 4; ++sweep) { |
| | CudaTimer sweep_timer; |
| |
|
| | |
| | sweep_options.perturbation = 1.0f / std::pow(2.0f, iter + sweep / 4.0f); |
| |
|
| | |
| | sweep_options.prev_sel_prob_weight = |
| | static_cast<float>(iter * 4 + sweep) / total_num_steps; |
| |
|
| | const bool last_sweep = iter == options_.num_iterations - 1 && sweep == 3; |
| |
|
| | #define CALL_SWEEP_FUNC \ |
| | SweepFromTopToBottom<kWindowSize, kWindowStep, kGeomConsistencyTerm, \ |
| | kFilterPhotoConsistency, kFilterGeomConsistency> \ |
| | <<<sweep_grid_size_, sweep_block_size_>>>( \ |
| | *global_workspace_, *rand_state_map_, *cost_map_, *depth_map_, \ |
| | *normal_map_, *consistency_mask_, *sel_prob_map_, \ |
| | *prev_sel_prob_map_, *ref_image_->sum_image, \ |
| | *ref_image_->squared_sum_image, sweep_options); |
| |
|
| | if (last_sweep) { |
| | if (options_.filter) { |
| | consistency_mask_.reset(new GpuMat<uint8_t>(cost_map_->GetWidth(), |
| | cost_map_->GetHeight(), |
| | cost_map_->GetDepth())); |
| | consistency_mask_->FillWithScalar(0); |
| | } |
| | if (options_.geom_consistency) { |
| | const bool kGeomConsistencyTerm = true; |
| | if (options_.filter) { |
| | const bool kFilterPhotoConsistency = true; |
| | const bool kFilterGeomConsistency = true; |
| | CALL_SWEEP_FUNC |
| | } else { |
| | const bool kFilterPhotoConsistency = false; |
| | const bool kFilterGeomConsistency = false; |
| | CALL_SWEEP_FUNC |
| | } |
| | } else { |
| | const bool kGeomConsistencyTerm = false; |
| | if (options_.filter) { |
| | const bool kFilterPhotoConsistency = true; |
| | const bool kFilterGeomConsistency = false; |
| | CALL_SWEEP_FUNC |
| | } else { |
| | const bool kFilterPhotoConsistency = false; |
| | const bool kFilterGeomConsistency = false; |
| | CALL_SWEEP_FUNC |
| | } |
| | } |
| | } else { |
| | const bool kFilterPhotoConsistency = false; |
| | const bool kFilterGeomConsistency = false; |
| | if (options_.geom_consistency) { |
| | const bool kGeomConsistencyTerm = true; |
| | CALL_SWEEP_FUNC |
| | } else { |
| | const bool kGeomConsistencyTerm = false; |
| | CALL_SWEEP_FUNC |
| | } |
| | } |
| |
|
| | #undef CALL_SWEEP_FUNC |
| |
|
| | CUDA_SYNC_AND_CHECK(); |
| |
|
| | Rotate(); |
| |
|
| | |
| | if (last_sweep && options_.filter) { |
| | std::unique_ptr<GpuMat<uint8_t>> rot_consistency_mask_( |
| | new GpuMat<uint8_t>(cost_map_->GetWidth(), cost_map_->GetHeight(), |
| | cost_map_->GetDepth())); |
| | consistency_mask_->Rotate(rot_consistency_mask_.get()); |
| | consistency_mask_.swap(rot_consistency_mask_); |
| | } |
| |
|
| | sweep_timer.Print(" Sweep " + std::to_string(sweep + 1)); |
| | } |
| |
|
| | iter_timer.Print("Iteration " + std::to_string(iter + 1)); |
| | } |
| |
|
| | total_timer.Print("Total"); |
| | } |
| |
|
| | void PatchMatchCuda::ComputeCudaConfig() { |
| | sweep_block_size_.x = THREADS_PER_BLOCK; |
| | sweep_block_size_.y = 1; |
| | sweep_block_size_.z = 1; |
| | sweep_grid_size_.x = (depth_map_->GetWidth() - 1) / THREADS_PER_BLOCK + 1; |
| | sweep_grid_size_.y = 1; |
| | sweep_grid_size_.z = 1; |
| |
|
| | elem_wise_block_size_.x = THREADS_PER_BLOCK; |
| | elem_wise_block_size_.y = THREADS_PER_BLOCK; |
| | elem_wise_block_size_.z = 1; |
| | elem_wise_grid_size_.x = (depth_map_->GetWidth() - 1) / THREADS_PER_BLOCK + 1; |
| | elem_wise_grid_size_.y = |
| | (depth_map_->GetHeight() - 1) / THREADS_PER_BLOCK + 1; |
| | elem_wise_grid_size_.z = 1; |
| | } |
| |
|
| | void PatchMatchCuda::InitRefImage() { |
| | const Image& ref_image = problem_.images->at(problem_.ref_image_idx); |
| |
|
| | ref_width_ = ref_image.GetWidth(); |
| | ref_height_ = ref_image.GetHeight(); |
| |
|
| | |
| | ref_image_.reset(new GpuMatRefImage(ref_width_, ref_height_)); |
| | const std::vector<uint8_t> ref_image_array = |
| | ref_image.GetBitmap().ConvertToRowMajorArray(); |
| | ref_image_->Filter(ref_image_array.data(), options_.window_radius, |
| | options_.window_step, options_.sigma_spatial, |
| | options_.sigma_color); |
| |
|
| | ref_image_device_.reset( |
| | new CudaArrayWrapper<uint8_t>(ref_width_, ref_height_, 1)); |
| | ref_image_device_->CopyFromGpuMat(*ref_image_->image); |
| |
|
| | |
| | ref_image_texture.addressMode[0] = cudaAddressModeBorder; |
| | ref_image_texture.addressMode[1] = cudaAddressModeBorder; |
| | ref_image_texture.addressMode[2] = cudaAddressModeBorder; |
| | ref_image_texture.filterMode = cudaFilterModePoint; |
| | ref_image_texture.normalized = false; |
| | CUDA_SAFE_CALL( |
| | cudaBindTextureToArray(ref_image_texture, ref_image_device_->GetPtr())); |
| | } |
| |
|
| | void PatchMatchCuda::InitSourceImages() { |
| | |
| | size_t max_width = 0; |
| | size_t max_height = 0; |
| | for (const auto image_idx : problem_.src_image_idxs) { |
| | const Image& image = problem_.images->at(image_idx); |
| | if (image.GetWidth() > max_width) { |
| | max_width = image.GetWidth(); |
| | } |
| | if (image.GetHeight() > max_height) { |
| | max_height = image.GetHeight(); |
| | } |
| | } |
| |
|
| | |
| | { |
| | |
| | const uint8_t kDefaultValue = 0; |
| | std::vector<uint8_t> src_images_host_data( |
| | static_cast<size_t>(max_width * max_height * |
| | problem_.src_image_idxs.size()), |
| | kDefaultValue); |
| | for (size_t i = 0; i < problem_.src_image_idxs.size(); ++i) { |
| | const Image& image = problem_.images->at(problem_.src_image_idxs[i]); |
| | const Bitmap& bitmap = image.GetBitmap(); |
| | uint8_t* dest = src_images_host_data.data() + max_width * max_height * i; |
| | for (size_t r = 0; r < image.GetHeight(); ++r) { |
| | memcpy(dest, bitmap.GetScanline(r), image.GetWidth() * sizeof(uint8_t)); |
| | dest += max_width; |
| | } |
| | } |
| |
|
| | |
| | src_images_device_.reset(new CudaArrayWrapper<uint8_t>( |
| | max_width, max_height, problem_.src_image_idxs.size())); |
| | src_images_device_->CopyToDevice(src_images_host_data.data()); |
| |
|
| | |
| | src_images_texture.addressMode[0] = cudaAddressModeBorder; |
| | src_images_texture.addressMode[1] = cudaAddressModeBorder; |
| | src_images_texture.addressMode[2] = cudaAddressModeBorder; |
| | src_images_texture.filterMode = cudaFilterModeLinear; |
| | src_images_texture.normalized = false; |
| | CUDA_SAFE_CALL(cudaBindTextureToArray(src_images_texture, |
| | src_images_device_->GetPtr())); |
| | } |
| |
|
| | |
| | if (options_.geom_consistency) { |
| | const float kDefaultValue = 0.0f; |
| | std::vector<float> src_depth_maps_host_data( |
| | static_cast<size_t>(max_width * max_height * |
| | problem_.src_image_idxs.size()), |
| | kDefaultValue); |
| | for (size_t i = 0; i < problem_.src_image_idxs.size(); ++i) { |
| | const DepthMap& depth_map = |
| | problem_.depth_maps->at(problem_.src_image_idxs[i]); |
| | float* dest = |
| | src_depth_maps_host_data.data() + max_width * max_height * i; |
| | for (size_t r = 0; r < depth_map.GetHeight(); ++r) { |
| | memcpy(dest, depth_map.GetPtr() + r * depth_map.GetWidth(), |
| | depth_map.GetWidth() * sizeof(float)); |
| | dest += max_width; |
| | } |
| | } |
| |
|
| | src_depth_maps_device_.reset(new CudaArrayWrapper<float>( |
| | max_width, max_height, problem_.src_image_idxs.size())); |
| | src_depth_maps_device_->CopyToDevice(src_depth_maps_host_data.data()); |
| |
|
| | |
| | src_depth_maps_texture.addressMode[0] = cudaAddressModeBorder; |
| | src_depth_maps_texture.addressMode[1] = cudaAddressModeBorder; |
| | src_depth_maps_texture.addressMode[2] = cudaAddressModeBorder; |
| | |
| | src_depth_maps_texture.filterMode = cudaFilterModePoint; |
| | src_depth_maps_texture.normalized = false; |
| | CUDA_SAFE_CALL(cudaBindTextureToArray(src_depth_maps_texture, |
| | src_depth_maps_device_->GetPtr())); |
| | } |
| | } |
| |
|
| | void PatchMatchCuda::InitTransforms() { |
| | const Image& ref_image = problem_.images->at(problem_.ref_image_idx); |
| |
|
| | |
| | |
| | |
| |
|
| | for (size_t i = 0; i < 4; ++i) { |
| | ref_K_host_[i][0] = ref_image.GetK()[0]; |
| | ref_K_host_[i][1] = ref_image.GetK()[2]; |
| | ref_K_host_[i][2] = ref_image.GetK()[4]; |
| | ref_K_host_[i][3] = ref_image.GetK()[5]; |
| | } |
| |
|
| | |
| | std::swap(ref_K_host_[1][0], ref_K_host_[1][2]); |
| | std::swap(ref_K_host_[1][1], ref_K_host_[1][3]); |
| | ref_K_host_[1][3] = ref_width_ - 1 - ref_K_host_[1][3]; |
| |
|
| | |
| | ref_K_host_[2][1] = ref_width_ - 1 - ref_K_host_[2][1]; |
| | ref_K_host_[2][3] = ref_height_ - 1 - ref_K_host_[2][3]; |
| |
|
| | |
| | std::swap(ref_K_host_[3][0], ref_K_host_[3][2]); |
| | std::swap(ref_K_host_[3][1], ref_K_host_[3][3]); |
| | ref_K_host_[3][1] = ref_height_ - 1 - ref_K_host_[3][1]; |
| |
|
| | |
| | for (size_t i = 0; i < 4; ++i) { |
| | ref_inv_K_host_[i][0] = 1.0f / ref_K_host_[i][0]; |
| | ref_inv_K_host_[i][1] = -ref_K_host_[i][1] / ref_K_host_[i][0]; |
| | ref_inv_K_host_[i][2] = 1.0f / ref_K_host_[i][2]; |
| | ref_inv_K_host_[i][3] = -ref_K_host_[i][3] / ref_K_host_[i][2]; |
| | } |
| |
|
| | |
| | CUDA_SAFE_CALL(cudaMemcpyToSymbol(ref_K, ref_K_host_[0], sizeof(float) * 4, 0, |
| | cudaMemcpyHostToDevice)); |
| | CUDA_SAFE_CALL(cudaMemcpyToSymbol(ref_inv_K, ref_inv_K_host_[0], |
| | sizeof(float) * 4, 0, |
| | cudaMemcpyHostToDevice)); |
| |
|
| | |
| | |
| | |
| |
|
| | float rotated_R[9]; |
| | memcpy(rotated_R, ref_image.GetR(), 9 * sizeof(float)); |
| |
|
| | float rotated_T[3]; |
| | memcpy(rotated_T, ref_image.GetT(), 3 * sizeof(float)); |
| |
|
| | |
| | const float R_z90[9] = {0, 1, 0, -1, 0, 0, 0, 0, 1}; |
| |
|
| | for (size_t i = 0; i < 4; ++i) { |
| | const size_t kNumTformParams = 4 + 9 + 3 + 3 + 12 + 12; |
| | std::vector<float> poses_host_data(kNumTformParams * |
| | problem_.src_image_idxs.size()); |
| | int offset = 0; |
| | for (const auto image_idx : problem_.src_image_idxs) { |
| | const Image& image = problem_.images->at(image_idx); |
| |
|
| | const float K[4] = {image.GetK()[0], image.GetK()[2], image.GetK()[4], |
| | image.GetK()[5]}; |
| | memcpy(poses_host_data.data() + offset, K, 4 * sizeof(float)); |
| | offset += 4; |
| |
|
| | float rel_R[9]; |
| | float rel_T[3]; |
| | ComputeRelativePose(rotated_R, rotated_T, image.GetR(), image.GetT(), |
| | rel_R, rel_T); |
| | memcpy(poses_host_data.data() + offset, rel_R, 9 * sizeof(float)); |
| | offset += 9; |
| | memcpy(poses_host_data.data() + offset, rel_T, 3 * sizeof(float)); |
| | offset += 3; |
| |
|
| | float C[3]; |
| | ComputeProjectionCenter(rel_R, rel_T, C); |
| | memcpy(poses_host_data.data() + offset, C, 3 * sizeof(float)); |
| | offset += 3; |
| |
|
| | float P[12]; |
| | ComposeProjectionMatrix(image.GetK(), rel_R, rel_T, P); |
| | memcpy(poses_host_data.data() + offset, P, 12 * sizeof(float)); |
| | offset += 12; |
| |
|
| | float inv_P[12]; |
| | ComposeInverseProjectionMatrix(image.GetK(), rel_R, rel_T, inv_P); |
| | memcpy(poses_host_data.data() + offset, inv_P, 12 * sizeof(float)); |
| | offset += 12; |
| | } |
| |
|
| | poses_device_[i].reset(new CudaArrayWrapper<float>( |
| | kNumTformParams, problem_.src_image_idxs.size(), 1)); |
| | poses_device_[i]->CopyToDevice(poses_host_data.data()); |
| |
|
| | RotatePose(R_z90, rotated_R, rotated_T); |
| | } |
| |
|
| | poses_texture.addressMode[0] = cudaAddressModeBorder; |
| | poses_texture.addressMode[1] = cudaAddressModeBorder; |
| | poses_texture.addressMode[2] = cudaAddressModeBorder; |
| | poses_texture.filterMode = cudaFilterModePoint; |
| | poses_texture.normalized = false; |
| | CUDA_SAFE_CALL( |
| | cudaBindTextureToArray(poses_texture, poses_device_[0]->GetPtr())); |
| | } |
| |
|
| | void PatchMatchCuda::InitWorkspaceMemory() { |
| | rand_state_map_.reset(new GpuMatPRNG(ref_width_, ref_height_)); |
| |
|
| | depth_map_.reset(new GpuMat<float>(ref_width_, ref_height_)); |
| | if (options_.geom_consistency) { |
| | const DepthMap& init_depth_map = |
| | problem_.depth_maps->at(problem_.ref_image_idx); |
| | depth_map_->CopyToDevice(init_depth_map.GetPtr(), |
| | init_depth_map.GetWidth() * sizeof(float)); |
| | } else { |
| | depth_map_->FillWithRandomNumbers(options_.depth_min, options_.depth_max, |
| | *rand_state_map_); |
| | } |
| |
|
| | normal_map_.reset(new GpuMat<float>(ref_width_, ref_height_, 3)); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | sel_prob_map_.reset(new GpuMat<float>(ref_width_, ref_height_, |
| | problem_.src_image_idxs.size())); |
| | prev_sel_prob_map_.reset(new GpuMat<float>(ref_width_, ref_height_, |
| | problem_.src_image_idxs.size())); |
| | prev_sel_prob_map_->FillWithScalar(0.5f); |
| |
|
| | cost_map_.reset(new GpuMat<float>(ref_width_, ref_height_, |
| | problem_.src_image_idxs.size())); |
| |
|
| | const int ref_max_dim = std::max(ref_width_, ref_height_); |
| | global_workspace_.reset( |
| | new GpuMat<float>(ref_max_dim, problem_.src_image_idxs.size(), 2)); |
| |
|
| | consistency_mask_.reset(new GpuMat<uint8_t>(0, 0, 0)); |
| |
|
| | ComputeCudaConfig(); |
| |
|
| | if (options_.geom_consistency) { |
| | const NormalMap& init_normal_map = |
| | problem_.normal_maps->at(problem_.ref_image_idx); |
| | normal_map_->CopyToDevice(init_normal_map.GetPtr(), |
| | init_normal_map.GetWidth() * sizeof(float)); |
| | } else { |
| | InitNormalMap<<<elem_wise_grid_size_, elem_wise_block_size_>>>( |
| | *normal_map_, *rand_state_map_); |
| | } |
| | } |
| |
|
| | void PatchMatchCuda::Rotate() { |
| | rotation_in_half_pi_ = (rotation_in_half_pi_ + 1) % 4; |
| |
|
| | size_t width; |
| | size_t height; |
| | if (rotation_in_half_pi_ % 2 == 0) { |
| | width = ref_width_; |
| | height = ref_height_; |
| | } else { |
| | width = ref_height_; |
| | height = ref_width_; |
| | } |
| |
|
| | |
| | { |
| | std::unique_ptr<GpuMatPRNG> rotated_rand_state_map( |
| | new GpuMatPRNG(width, height)); |
| | rand_state_map_->Rotate(rotated_rand_state_map.get()); |
| | rand_state_map_.swap(rotated_rand_state_map); |
| | } |
| |
|
| | |
| | { |
| | std::unique_ptr<GpuMat<float>> rotated_depth_map( |
| | new GpuMat<float>(width, height)); |
| | depth_map_->Rotate(rotated_depth_map.get()); |
| | depth_map_.swap(rotated_depth_map); |
| | } |
| |
|
| | |
| | { |
| | RotateNormalMap<<<elem_wise_grid_size_, elem_wise_block_size_>>>( |
| | *normal_map_); |
| | std::unique_ptr<GpuMat<float>> rotated_normal_map( |
| | new GpuMat<float>(width, height, 3)); |
| | normal_map_->Rotate(rotated_normal_map.get()); |
| | normal_map_.swap(rotated_normal_map); |
| | } |
| |
|
| | |
| | { |
| | std::unique_ptr<GpuMatRefImage> rotated_ref_image( |
| | new GpuMatRefImage(width, height)); |
| | ref_image_->image->Rotate(rotated_ref_image->image.get()); |
| | ref_image_->sum_image->Rotate(rotated_ref_image->sum_image.get()); |
| | ref_image_->squared_sum_image->Rotate( |
| | rotated_ref_image->squared_sum_image.get()); |
| | ref_image_.swap(rotated_ref_image); |
| | } |
| |
|
| | |
| | ref_image_device_.reset(new CudaArrayWrapper<uint8_t>(width, height, 1)); |
| | ref_image_device_->CopyFromGpuMat(*ref_image_->image); |
| | CUDA_SAFE_CALL(cudaUnbindTexture(ref_image_texture)); |
| | CUDA_SAFE_CALL( |
| | cudaBindTextureToArray(ref_image_texture, ref_image_device_->GetPtr())); |
| |
|
| | |
| | prev_sel_prob_map_.reset( |
| | new GpuMat<float>(width, height, problem_.src_image_idxs.size())); |
| | sel_prob_map_->Rotate(prev_sel_prob_map_.get()); |
| | sel_prob_map_.reset( |
| | new GpuMat<float>(width, height, problem_.src_image_idxs.size())); |
| |
|
| | |
| | { |
| | std::unique_ptr<GpuMat<float>> rotated_cost_map( |
| | new GpuMat<float>(width, height, problem_.src_image_idxs.size())); |
| | cost_map_->Rotate(rotated_cost_map.get()); |
| | cost_map_.swap(rotated_cost_map); |
| | } |
| |
|
| | |
| | CUDA_SAFE_CALL(cudaUnbindTexture(poses_texture)); |
| | CUDA_SAFE_CALL(cudaBindTextureToArray( |
| | poses_texture, poses_device_[rotation_in_half_pi_]->GetPtr())); |
| |
|
| | |
| | CUDA_SAFE_CALL(cudaMemcpyToSymbol(ref_K, ref_K_host_[rotation_in_half_pi_], |
| | sizeof(float) * 4, 0, |
| | cudaMemcpyHostToDevice)); |
| | CUDA_SAFE_CALL( |
| | cudaMemcpyToSymbol(ref_inv_K, ref_inv_K_host_[rotation_in_half_pi_], |
| | sizeof(float) * 4, 0, cudaMemcpyHostToDevice)); |
| |
|
| | |
| | ComputeCudaConfig(); |
| | } |
| |
|
| | } |
| | } |
| |
|