Spaces:
Runtime error
Runtime error
| # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ | |
| This code is refer from: | |
| https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/fcenet_targets.py | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from numpy.fft import fft | |
| from numpy.linalg import norm | |
| import sys | |
| def vector_slope(vec): | |
| assert len(vec) == 2 | |
| return abs(vec[1] / (vec[0] + 1e-8)) | |
| class FCENetTargets: | |
| """Generate the ground truth targets of FCENet: Fourier Contour Embedding | |
| for Arbitrary-Shaped Text Detection. | |
| [https://arxiv.org/abs/2104.10442] | |
| Args: | |
| fourier_degree (int): The maximum Fourier transform degree k. | |
| resample_step (float): The step size for resampling the text center | |
| line (TCL). It's better not to exceed half of the minimum width. | |
| center_region_shrink_ratio (float): The shrink ratio of text center | |
| region. | |
| level_size_divisors (tuple(int)): The downsample ratio on each level. | |
| level_proportion_range (tuple(tuple(int))): The range of text sizes | |
| assigned to each level. | |
| """ | |
| def __init__(self, | |
| fourier_degree=5, | |
| resample_step=4.0, | |
| center_region_shrink_ratio=0.3, | |
| level_size_divisors=(8, 16, 32), | |
| level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0)), | |
| orientation_thr=2.0, | |
| **kwargs): | |
| super().__init__() | |
| assert isinstance(level_size_divisors, tuple) | |
| assert isinstance(level_proportion_range, tuple) | |
| assert len(level_size_divisors) == len(level_proportion_range) | |
| self.fourier_degree = fourier_degree | |
| self.resample_step = resample_step | |
| self.center_region_shrink_ratio = center_region_shrink_ratio | |
| self.level_size_divisors = level_size_divisors | |
| self.level_proportion_range = level_proportion_range | |
| self.orientation_thr = orientation_thr | |
| def vector_angle(self, vec1, vec2): | |
| if vec1.ndim > 1: | |
| unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1)) | |
| else: | |
| unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8) | |
| if vec2.ndim > 1: | |
| unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1)) | |
| else: | |
| unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8) | |
| return np.arccos( | |
| np.clip( | |
| np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0)) | |
| def resample_line(self, line, n): | |
| """Resample n points on a line. | |
| Args: | |
| line (ndarray): The points composing a line. | |
| n (int): The resampled points number. | |
| Returns: | |
| resampled_line (ndarray): The points composing the resampled line. | |
| """ | |
| assert line.ndim == 2 | |
| assert line.shape[0] >= 2 | |
| assert line.shape[1] == 2 | |
| assert isinstance(n, int) | |
| assert n > 0 | |
| length_list = [ | |
| norm(line[i + 1] - line[i]) for i in range(len(line) - 1) | |
| ] | |
| total_length = sum(length_list) | |
| length_cumsum = np.cumsum([0.0] + length_list) | |
| delta_length = total_length / (float(n) + 1e-8) | |
| current_edge_ind = 0 | |
| resampled_line = [line[0]] | |
| for i in range(1, n): | |
| current_line_len = i * delta_length | |
| while current_edge_ind + 1 < len( | |
| length_cumsum) and current_line_len >= length_cumsum[ | |
| current_edge_ind + 1]: | |
| current_edge_ind += 1 | |
| current_edge_end_shift = current_line_len - length_cumsum[ | |
| current_edge_ind] | |
| if current_edge_ind >= len(length_list): | |
| break | |
| end_shift_ratio = current_edge_end_shift / length_list[ | |
| current_edge_ind] | |
| current_point = line[current_edge_ind] + (line[current_edge_ind + 1] | |
| - line[current_edge_ind] | |
| ) * end_shift_ratio | |
| resampled_line.append(current_point) | |
| resampled_line.append(line[-1]) | |
| resampled_line = np.array(resampled_line) | |
| return resampled_line | |
| def reorder_poly_edge(self, points): | |
| """Get the respective points composing head edge, tail edge, top | |
| sideline and bottom sideline. | |
| Args: | |
| points (ndarray): The points composing a text polygon. | |
| Returns: | |
| head_edge (ndarray): The two points composing the head edge of text | |
| polygon. | |
| tail_edge (ndarray): The two points composing the tail edge of text | |
| polygon. | |
| top_sideline (ndarray): The points composing top curved sideline of | |
| text polygon. | |
| bot_sideline (ndarray): The points composing bottom curved sideline | |
| of text polygon. | |
| """ | |
| assert points.ndim == 2 | |
| assert points.shape[0] >= 4 | |
| assert points.shape[1] == 2 | |
| head_inds, tail_inds = self.find_head_tail(points, self.orientation_thr) | |
| head_edge, tail_edge = points[head_inds], points[tail_inds] | |
| pad_points = np.vstack([points, points]) | |
| if tail_inds[1] < 1: | |
| tail_inds[1] = len(points) | |
| sideline1 = pad_points[head_inds[1]:tail_inds[1]] | |
| sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))] | |
| sideline_mean_shift = np.mean( | |
| sideline1, axis=0) - np.mean( | |
| sideline2, axis=0) | |
| if sideline_mean_shift[1] > 0: | |
| top_sideline, bot_sideline = sideline2, sideline1 | |
| else: | |
| top_sideline, bot_sideline = sideline1, sideline2 | |
| return head_edge, tail_edge, top_sideline, bot_sideline | |
| def find_head_tail(self, points, orientation_thr): | |
| """Find the head edge and tail edge of a text polygon. | |
| Args: | |
| points (ndarray): The points composing a text polygon. | |
| orientation_thr (float): The threshold for distinguishing between | |
| head edge and tail edge among the horizontal and vertical edges | |
| of a quadrangle. | |
| Returns: | |
| head_inds (list): The indexes of two points composing head edge. | |
| tail_inds (list): The indexes of two points composing tail edge. | |
| """ | |
| assert points.ndim == 2 | |
| assert points.shape[0] >= 4 | |
| assert points.shape[1] == 2 | |
| assert isinstance(orientation_thr, float) | |
| if len(points) > 4: | |
| pad_points = np.vstack([points, points[0]]) | |
| edge_vec = pad_points[1:] - pad_points[:-1] | |
| theta_sum = [] | |
| adjacent_vec_theta = [] | |
| for i, edge_vec1 in enumerate(edge_vec): | |
| adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]] | |
| adjacent_edge_vec = edge_vec[adjacent_ind] | |
| temp_theta_sum = np.sum( | |
| self.vector_angle(edge_vec1, adjacent_edge_vec)) | |
| temp_adjacent_theta = self.vector_angle(adjacent_edge_vec[0], | |
| adjacent_edge_vec[1]) | |
| theta_sum.append(temp_theta_sum) | |
| adjacent_vec_theta.append(temp_adjacent_theta) | |
| theta_sum_score = np.array(theta_sum) / np.pi | |
| adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi | |
| poly_center = np.mean(points, axis=0) | |
| edge_dist = np.maximum( | |
| norm( | |
| pad_points[1:] - poly_center, axis=-1), | |
| norm( | |
| pad_points[:-1] - poly_center, axis=-1)) | |
| dist_score = edge_dist / np.max(edge_dist) | |
| position_score = np.zeros(len(edge_vec)) | |
| score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score | |
| score += 0.35 * dist_score | |
| if len(points) % 2 == 0: | |
| position_score[(len(score) // 2 - 1)] += 1 | |
| position_score[-1] += 1 | |
| score += 0.1 * position_score | |
| pad_score = np.concatenate([score, score]) | |
| score_matrix = np.zeros((len(score), len(score) - 3)) | |
| x = np.arange(len(score) - 3) / float(len(score) - 4) | |
| gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power( | |
| (x - 0.5) / 0.5, 2.) / 2) | |
| gaussian = gaussian / np.max(gaussian) | |
| for i in range(len(score)): | |
| score_matrix[i, :] = score[i] + pad_score[(i + 2):(i + len( | |
| score) - 1)] * gaussian * 0.3 | |
| head_start, tail_increment = np.unravel_index(score_matrix.argmax(), | |
| score_matrix.shape) | |
| tail_start = (head_start + tail_increment + 2) % len(points) | |
| head_end = (head_start + 1) % len(points) | |
| tail_end = (tail_start + 1) % len(points) | |
| if head_end > tail_end: | |
| head_start, tail_start = tail_start, head_start | |
| head_end, tail_end = tail_end, head_end | |
| head_inds = [head_start, head_end] | |
| tail_inds = [tail_start, tail_end] | |
| else: | |
| if vector_slope(points[1] - points[0]) + vector_slope(points[ | |
| 3] - points[2]) < vector_slope(points[2] - points[ | |
| 1]) + vector_slope(points[0] - points[3]): | |
| horizontal_edge_inds = [[0, 1], [2, 3]] | |
| vertical_edge_inds = [[3, 0], [1, 2]] | |
| else: | |
| horizontal_edge_inds = [[3, 0], [1, 2]] | |
| vertical_edge_inds = [[0, 1], [2, 3]] | |
| vertical_len_sum = norm(points[vertical_edge_inds[0][0]] - points[ | |
| vertical_edge_inds[0][1]]) + norm(points[vertical_edge_inds[1][ | |
| 0]] - points[vertical_edge_inds[1][1]]) | |
| horizontal_len_sum = norm(points[horizontal_edge_inds[0][ | |
| 0]] - points[horizontal_edge_inds[0][1]]) + norm(points[ | |
| horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1] | |
| [1]]) | |
| if vertical_len_sum > horizontal_len_sum * orientation_thr: | |
| head_inds = horizontal_edge_inds[0] | |
| tail_inds = horizontal_edge_inds[1] | |
| else: | |
| head_inds = vertical_edge_inds[0] | |
| tail_inds = vertical_edge_inds[1] | |
| return head_inds, tail_inds | |
| def resample_sidelines(self, sideline1, sideline2, resample_step): | |
| """Resample two sidelines to be of the same points number according to | |
| step size. | |
| Args: | |
| sideline1 (ndarray): The points composing a sideline of a text | |
| polygon. | |
| sideline2 (ndarray): The points composing another sideline of a | |
| text polygon. | |
| resample_step (float): The resampled step size. | |
| Returns: | |
| resampled_line1 (ndarray): The resampled line 1. | |
| resampled_line2 (ndarray): The resampled line 2. | |
| """ | |
| assert sideline1.ndim == sideline2.ndim == 2 | |
| assert sideline1.shape[1] == sideline2.shape[1] == 2 | |
| assert sideline1.shape[0] >= 2 | |
| assert sideline2.shape[0] >= 2 | |
| assert isinstance(resample_step, float) | |
| length1 = sum([ | |
| norm(sideline1[i + 1] - sideline1[i]) | |
| for i in range(len(sideline1) - 1) | |
| ]) | |
| length2 = sum([ | |
| norm(sideline2[i + 1] - sideline2[i]) | |
| for i in range(len(sideline2) - 1) | |
| ]) | |
| total_length = (length1 + length2) / 2 | |
| resample_point_num = max(int(float(total_length) / resample_step), 1) | |
| resampled_line1 = self.resample_line(sideline1, resample_point_num) | |
| resampled_line2 = self.resample_line(sideline2, resample_point_num) | |
| return resampled_line1, resampled_line2 | |
| def generate_center_region_mask(self, img_size, text_polys): | |
| """Generate text center region mask. | |
| Args: | |
| img_size (tuple): The image size of (height, width). | |
| text_polys (list[list[ndarray]]): The list of text polygons. | |
| Returns: | |
| center_region_mask (ndarray): The text center region mask. | |
| """ | |
| assert isinstance(img_size, tuple) | |
| # assert check_argument.is_2dlist(text_polys) | |
| h, w = img_size | |
| center_region_mask = np.zeros((h, w), np.uint8) | |
| center_region_boxes = [] | |
| for poly in text_polys: | |
| # assert len(poly) == 1 | |
| polygon_points = poly.reshape(-1, 2) | |
| _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) | |
| resampled_top_line, resampled_bot_line = self.resample_sidelines( | |
| top_line, bot_line, self.resample_step) | |
| resampled_bot_line = resampled_bot_line[::-1] | |
| if len(resampled_top_line) != len(resampled_bot_line): | |
| continue | |
| center_line = (resampled_top_line + resampled_bot_line) / 2 | |
| line_head_shrink_len = norm(resampled_top_line[0] - | |
| resampled_bot_line[0]) / 4.0 | |
| line_tail_shrink_len = norm(resampled_top_line[-1] - | |
| resampled_bot_line[-1]) / 4.0 | |
| head_shrink_num = int(line_head_shrink_len // self.resample_step) | |
| tail_shrink_num = int(line_tail_shrink_len // self.resample_step) | |
| if len(center_line) > head_shrink_num + tail_shrink_num + 2: | |
| center_line = center_line[head_shrink_num:len(center_line) - | |
| tail_shrink_num] | |
| resampled_top_line = resampled_top_line[head_shrink_num:len( | |
| resampled_top_line) - tail_shrink_num] | |
| resampled_bot_line = resampled_bot_line[head_shrink_num:len( | |
| resampled_bot_line) - tail_shrink_num] | |
| for i in range(0, len(center_line) - 1): | |
| tl = center_line[i] + (resampled_top_line[i] - center_line[i] | |
| ) * self.center_region_shrink_ratio | |
| tr = center_line[i + 1] + (resampled_top_line[i + 1] - | |
| center_line[i + 1] | |
| ) * self.center_region_shrink_ratio | |
| br = center_line[i + 1] + (resampled_bot_line[i + 1] - | |
| center_line[i + 1] | |
| ) * self.center_region_shrink_ratio | |
| bl = center_line[i] + (resampled_bot_line[i] - center_line[i] | |
| ) * self.center_region_shrink_ratio | |
| current_center_box = np.vstack([tl, tr, br, | |
| bl]).astype(np.int32) | |
| center_region_boxes.append(current_center_box) | |
| cv2.fillPoly(center_region_mask, center_region_boxes, 1) | |
| return center_region_mask | |
| def resample_polygon(self, polygon, n=400): | |
| """Resample one polygon with n points on its boundary. | |
| Args: | |
| polygon (list[float]): The input polygon. | |
| n (int): The number of resampled points. | |
| Returns: | |
| resampled_polygon (list[float]): The resampled polygon. | |
| """ | |
| length = [] | |
| for i in range(len(polygon)): | |
| p1 = polygon[i] | |
| if i == len(polygon) - 1: | |
| p2 = polygon[0] | |
| else: | |
| p2 = polygon[i + 1] | |
| length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5) | |
| total_length = sum(length) | |
| n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n | |
| n_on_each_line = n_on_each_line.astype(np.int32) | |
| new_polygon = [] | |
| for i in range(len(polygon)): | |
| num = n_on_each_line[i] | |
| p1 = polygon[i] | |
| if i == len(polygon) - 1: | |
| p2 = polygon[0] | |
| else: | |
| p2 = polygon[i + 1] | |
| if num == 0: | |
| continue | |
| dxdy = (p2 - p1) / num | |
| for j in range(num): | |
| point = p1 + dxdy * j | |
| new_polygon.append(point) | |
| return np.array(new_polygon) | |
| def normalize_polygon(self, polygon): | |
| """Normalize one polygon so that its start point is at right most. | |
| Args: | |
| polygon (list[float]): The origin polygon. | |
| Returns: | |
| new_polygon (lost[float]): The polygon with start point at right. | |
| """ | |
| temp_polygon = polygon - polygon.mean(axis=0) | |
| x = np.abs(temp_polygon[:, 0]) | |
| y = temp_polygon[:, 1] | |
| index_x = np.argsort(x) | |
| index_y = np.argmin(y[index_x[:8]]) | |
| index = index_x[index_y] | |
| new_polygon = np.concatenate([polygon[index:], polygon[:index]]) | |
| return new_polygon | |
| def poly2fourier(self, polygon, fourier_degree): | |
| """Perform Fourier transformation to generate Fourier coefficients ck | |
| from polygon. | |
| Args: | |
| polygon (ndarray): An input polygon. | |
| fourier_degree (int): The maximum Fourier degree K. | |
| Returns: | |
| c (ndarray(complex)): Fourier coefficients. | |
| """ | |
| points = polygon[:, 0] + polygon[:, 1] * 1j | |
| c_fft = fft(points) / len(points) | |
| c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1])) | |
| return c | |
| def clockwise(self, c, fourier_degree): | |
| """Make sure the polygon reconstructed from Fourier coefficients c in | |
| the clockwise direction. | |
| Args: | |
| polygon (list[float]): The origin polygon. | |
| Returns: | |
| new_polygon (lost[float]): The polygon in clockwise point order. | |
| """ | |
| if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]): | |
| return c | |
| elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]): | |
| return c[::-1] | |
| else: | |
| if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]): | |
| return c | |
| else: | |
| return c[::-1] | |
| def cal_fourier_signature(self, polygon, fourier_degree): | |
| """Calculate Fourier signature from input polygon. | |
| Args: | |
| polygon (ndarray): The input polygon. | |
| fourier_degree (int): The maximum Fourier degree K. | |
| Returns: | |
| fourier_signature (ndarray): An array shaped (2k+1, 2) containing | |
| real part and image part of 2k+1 Fourier coefficients. | |
| """ | |
| resampled_polygon = self.resample_polygon(polygon) | |
| resampled_polygon = self.normalize_polygon(resampled_polygon) | |
| fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree) | |
| fourier_coeff = self.clockwise(fourier_coeff, fourier_degree) | |
| real_part = np.real(fourier_coeff).reshape((-1, 1)) | |
| image_part = np.imag(fourier_coeff).reshape((-1, 1)) | |
| fourier_signature = np.hstack([real_part, image_part]) | |
| return fourier_signature | |
| def generate_fourier_maps(self, img_size, text_polys): | |
| """Generate Fourier coefficient maps. | |
| Args: | |
| img_size (tuple): The image size of (height, width). | |
| text_polys (list[list[ndarray]]): The list of text polygons. | |
| Returns: | |
| fourier_real_map (ndarray): The Fourier coefficient real part maps. | |
| fourier_image_map (ndarray): The Fourier coefficient image part | |
| maps. | |
| """ | |
| assert isinstance(img_size, tuple) | |
| h, w = img_size | |
| k = self.fourier_degree | |
| real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) | |
| imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) | |
| for poly in text_polys: | |
| mask = np.zeros((h, w), dtype=np.uint8) | |
| polygon = np.array(poly).reshape((1, -1, 2)) | |
| cv2.fillPoly(mask, polygon.astype(np.int32), 1) | |
| fourier_coeff = self.cal_fourier_signature(polygon[0], k) | |
| for i in range(-k, k + 1): | |
| if i != 0: | |
| real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + ( | |
| 1 - mask) * real_map[i + k, :, :] | |
| imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + ( | |
| 1 - mask) * imag_map[i + k, :, :] | |
| else: | |
| yx = np.argwhere(mask > 0.5) | |
| k_ind = np.ones((len(yx)), dtype=np.int64) * k | |
| y, x = yx[:, 0], yx[:, 1] | |
| real_map[k_ind, y, x] = fourier_coeff[k, 0] - x | |
| imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y | |
| return real_map, imag_map | |
| def generate_text_region_mask(self, img_size, text_polys): | |
| """Generate text center region mask and geometry attribute maps. | |
| Args: | |
| img_size (tuple): The image size (height, width). | |
| text_polys (list[list[ndarray]]): The list of text polygons. | |
| Returns: | |
| text_region_mask (ndarray): The text region mask. | |
| """ | |
| assert isinstance(img_size, tuple) | |
| h, w = img_size | |
| text_region_mask = np.zeros((h, w), dtype=np.uint8) | |
| for poly in text_polys: | |
| polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2)) | |
| cv2.fillPoly(text_region_mask, polygon, 1) | |
| return text_region_mask | |
| def generate_effective_mask(self, mask_size: tuple, polygons_ignore): | |
| """Generate effective mask by setting the ineffective regions to 0 and | |
| effective regions to 1. | |
| Args: | |
| mask_size (tuple): The mask size. | |
| polygons_ignore (list[[ndarray]]: The list of ignored text | |
| polygons. | |
| Returns: | |
| mask (ndarray): The effective mask of (height, width). | |
| """ | |
| mask = np.ones(mask_size, dtype=np.uint8) | |
| for poly in polygons_ignore: | |
| instance = poly.reshape(-1, 2).astype(np.int32).reshape(1, -1, 2) | |
| cv2.fillPoly(mask, instance, 0) | |
| return mask | |
| def generate_level_targets(self, img_size, text_polys, ignore_polys): | |
| """Generate ground truth target on each level. | |
| Args: | |
| img_size (list[int]): Shape of input image. | |
| text_polys (list[list[ndarray]]): A list of ground truth polygons. | |
| ignore_polys (list[list[ndarray]]): A list of ignored polygons. | |
| Returns: | |
| level_maps (list(ndarray)): A list of ground target on each level. | |
| """ | |
| h, w = img_size | |
| lv_size_divs = self.level_size_divisors | |
| lv_proportion_range = self.level_proportion_range | |
| lv_text_polys = [[] for i in range(len(lv_size_divs))] | |
| lv_ignore_polys = [[] for i in range(len(lv_size_divs))] | |
| level_maps = [] | |
| for poly in text_polys: | |
| polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2)) | |
| _, _, box_w, box_h = cv2.boundingRect(polygon) | |
| proportion = max(box_h, box_w) / (h + 1e-8) | |
| for ind, proportion_range in enumerate(lv_proportion_range): | |
| if proportion_range[0] < proportion < proportion_range[1]: | |
| lv_text_polys[ind].append(poly / lv_size_divs[ind]) | |
| for ignore_poly in ignore_polys: | |
| polygon = np.array(ignore_poly, dtype=np.int32).reshape((1, -1, 2)) | |
| _, _, box_w, box_h = cv2.boundingRect(polygon) | |
| proportion = max(box_h, box_w) / (h + 1e-8) | |
| for ind, proportion_range in enumerate(lv_proportion_range): | |
| if proportion_range[0] < proportion < proportion_range[1]: | |
| lv_ignore_polys[ind].append(ignore_poly / lv_size_divs[ind]) | |
| for ind, size_divisor in enumerate(lv_size_divs): | |
| current_level_maps = [] | |
| level_img_size = (h // size_divisor, w // size_divisor) | |
| text_region = self.generate_text_region_mask( | |
| level_img_size, lv_text_polys[ind])[None] | |
| current_level_maps.append(text_region) | |
| center_region = self.generate_center_region_mask( | |
| level_img_size, lv_text_polys[ind])[None] | |
| current_level_maps.append(center_region) | |
| effective_mask = self.generate_effective_mask( | |
| level_img_size, lv_ignore_polys[ind])[None] | |
| current_level_maps.append(effective_mask) | |
| fourier_real_map, fourier_image_maps = self.generate_fourier_maps( | |
| level_img_size, lv_text_polys[ind]) | |
| current_level_maps.append(fourier_real_map) | |
| current_level_maps.append(fourier_image_maps) | |
| level_maps.append(np.concatenate(current_level_maps)) | |
| return level_maps | |
| def generate_targets(self, results): | |
| """Generate the ground truth targets for FCENet. | |
| Args: | |
| results (dict): The input result dictionary. | |
| Returns: | |
| results (dict): The output result dictionary. | |
| """ | |
| assert isinstance(results, dict) | |
| image = results['image'] | |
| polygons = results['polys'] | |
| ignore_tags = results['ignore_tags'] | |
| h, w, _ = image.shape | |
| polygon_masks = [] | |
| polygon_masks_ignore = [] | |
| for tag, polygon in zip(ignore_tags, polygons): | |
| if tag is True: | |
| polygon_masks_ignore.append(polygon) | |
| else: | |
| polygon_masks.append(polygon) | |
| level_maps = self.generate_level_targets((h, w), polygon_masks, | |
| polygon_masks_ignore) | |
| mapping = { | |
| 'p3_maps': level_maps[0], | |
| 'p4_maps': level_maps[1], | |
| 'p5_maps': level_maps[2] | |
| } | |
| for key, value in mapping.items(): | |
| results[key] = value | |
| return results | |
| def __call__(self, results): | |
| results = self.generate_targets(results) | |
| return results | |