Spaces:
Running
Running
| from typing import List | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw | |
| from surya.postprocessing.util import get_line_angle, rescale_bbox | |
| from surya.schema import ColumnLine | |
| def get_detected_lines_sobel(image, vertical=True): | |
| # Apply Sobel operator with a kernel size of 3 to detect vertical edges | |
| if vertical: | |
| dx = 1 | |
| dy = 0 | |
| else: | |
| dx = 0 | |
| dy = 1 | |
| sobelx = cv2.Sobel(image, cv2.CV_32F, dx, dy, ksize=3) | |
| # Absolute Sobel (to capture both edges) | |
| abs_sobelx = np.absolute(sobelx) | |
| # Convert to 8-bit image | |
| scaled_sobel = np.uint8(255 * abs_sobelx / np.max(abs_sobelx)) | |
| kernel = np.ones((20, 1), np.uint8) | |
| eroded = cv2.erode(scaled_sobel, kernel, iterations=1) | |
| scaled_sobel = cv2.dilate(eroded, kernel, iterations=3) | |
| return scaled_sobel | |
| def get_detected_lines(image, slope_tol_deg=2, vertical=False, horizontal=False) -> List[ColumnLine]: | |
| assert not (vertical and horizontal) | |
| new_image = image.astype(np.float32) * 255 # Convert to 0-255 range | |
| if vertical or horizontal: | |
| new_image = get_detected_lines_sobel(new_image, vertical) | |
| new_image = new_image.astype(np.uint8) | |
| edges = cv2.Canny(new_image, 150, 200, apertureSize=3) | |
| if vertical: | |
| max_gap = 100 | |
| min_length = 10 | |
| else: | |
| max_gap = 10 | |
| min_length = 4 | |
| lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=150, minLineLength=min_length, maxLineGap=max_gap) | |
| line_info = [] | |
| if lines is not None: | |
| for line in lines: | |
| vertical_line = False | |
| horizontal_line = False | |
| x1, y1, x2, y2 = line[0] | |
| bbox = [x1, y1, x2, y2] | |
| if x2 == x1: | |
| vertical_line = True | |
| else: | |
| line_angle = get_line_angle(x1, y1, x2, y2) | |
| if 90 - slope_tol_deg < line_angle < 90 + slope_tol_deg: | |
| vertical_line = True | |
| elif -90 - slope_tol_deg < line_angle < -90 + slope_tol_deg: | |
| vertical_line = True | |
| elif -slope_tol_deg < line_angle < slope_tol_deg: | |
| horizontal_line = True | |
| if bbox[3] < bbox[1]: | |
| bbox[1], bbox[3] = bbox[3], bbox[1] | |
| if bbox[2] < bbox[0]: | |
| bbox[0], bbox[2] = bbox[2], bbox[0] | |
| row = ColumnLine(bbox=bbox, vertical=vertical_line, horizontal=horizontal_line) | |
| line_info.append(row) | |
| if vertical: | |
| line_info = [line for line in line_info if line.vertical] | |
| if horizontal: | |
| line_info = [line for line in line_info if line.horizontal] | |
| return line_info | |
| def draw_lines_on_image(line_info: List[ColumnLine], img): | |
| draw = ImageDraw.Draw(img) | |
| for line in line_info: | |
| divisor = 20 | |
| if line.horizontal: | |
| divisor = 200 | |
| x1, y1, x2, y2 = [x // divisor * divisor for x in line.bbox] | |
| if line.vertical: | |
| draw.line((x1, y1, x2, y2), fill="red", width=3) | |
| return img | |
| def get_vertical_lines(image, processor_size, image_size, divisor=20, x_tolerance=40, y_tolerance=20) -> List[ColumnLine]: | |
| vertical_lines = get_detected_lines(image, vertical=True) | |
| for line in vertical_lines: | |
| line.rescale_bbox(processor_size, image_size) | |
| vertical_lines = sorted(vertical_lines, key=lambda x: x.bbox[0]) | |
| for line in vertical_lines: | |
| line.round_bbox(divisor) | |
| # Merge adjacent line segments together | |
| to_remove = [] | |
| for i, line in enumerate(vertical_lines): | |
| for j, line2 in enumerate(vertical_lines): | |
| if j <= i: | |
| continue | |
| if line.bbox[0] != line2.bbox[0]: | |
| continue | |
| expanded_line1 = [line.bbox[0], line.bbox[1] - y_tolerance, line.bbox[2], | |
| line.bbox[3] + y_tolerance] | |
| line1_points = set(range(int(expanded_line1[1]), int(expanded_line1[3]))) | |
| line2_points = set(range(int(line2.bbox[1]), int(line2.bbox[3]))) | |
| intersect_y = len(line1_points.intersection(line2_points)) > 0 | |
| if intersect_y: | |
| vertical_lines[j].bbox[1] = min(line.bbox[1], line2.bbox[1]) | |
| vertical_lines[j].bbox[3] = max(line.bbox[3], line2.bbox[3]) | |
| to_remove.append(i) | |
| vertical_lines = [line for i, line in enumerate(vertical_lines) if i not in to_remove] | |
| # Remove redundant segments | |
| to_remove = [] | |
| for i, line in enumerate(vertical_lines): | |
| if i in to_remove: | |
| continue | |
| for j, line2 in enumerate(vertical_lines): | |
| if j <= i or j in to_remove: | |
| continue | |
| close_in_x = abs(line.bbox[0] - line2.bbox[0]) < x_tolerance | |
| line1_points = set(range(int(line.bbox[1]), int(line.bbox[3]))) | |
| line2_points = set(range(int(line2.bbox[1]), int(line2.bbox[3]))) | |
| intersect_y = len(line1_points.intersection(line2_points)) > 0 | |
| if close_in_x and intersect_y: | |
| # Keep the longer line and extend it | |
| if len(line2_points) > len(line1_points): | |
| vertical_lines[j].bbox[1] = min(line.bbox[1], line2.bbox[1]) | |
| vertical_lines[j].bbox[3] = max(line.bbox[3], line2.bbox[3]) | |
| to_remove.append(i) | |
| else: | |
| vertical_lines[i].bbox[1] = min(line.bbox[1], line2.bbox[1]) | |
| vertical_lines[i].bbox[3] = max(line.bbox[3], line2.bbox[3]) | |
| to_remove.append(j) | |
| vertical_lines = [line for i, line in enumerate(vertical_lines) if i not in to_remove] | |
| if len(vertical_lines) > 0: | |
| # Always start with top left of page | |
| vertical_lines[0].bbox[1] = 0 | |
| return vertical_lines |