Spaces:
Runtime error
Runtime error
Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from reading_order import GraphBasedOrdering
|
| 2 |
+
from shapely.geometry import Polygon
|
| 3 |
+
import glob
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from shapely.validation import make_valid
|
| 7 |
+
|
| 8 |
+
order_poly = GraphBasedOrdering()
|
| 9 |
+
|
| 10 |
+
def load_image_paths(input_folder, extensions=None):
|
| 11 |
+
"""
|
| 12 |
+
Load image files from a folder.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
input_folder: Path to the folder containing images
|
| 16 |
+
extensions: List of file extensions to include (default: common image formats)
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
List of image file paths
|
| 20 |
+
"""
|
| 21 |
+
if extensions is None:
|
| 22 |
+
extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'tif', 'webp']
|
| 23 |
+
|
| 24 |
+
images = []
|
| 25 |
+
for ext in extensions:
|
| 26 |
+
# Add both lowercase and uppercase versions
|
| 27 |
+
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext}')))
|
| 28 |
+
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext.upper()}')))
|
| 29 |
+
|
| 30 |
+
return sorted(images)
|
| 31 |
+
|
| 32 |
+
def get_default_region(image_shape):
|
| 33 |
+
"""Function for creating a default region if no regions are detected."""
|
| 34 |
+
w, h = image_shape
|
| 35 |
+
region = {'coords': [[0.0, 0.0], [w, 0.0], [w, h], [0.0, h]],
|
| 36 |
+
'max_min': [w, 0.0, h, 0.0],
|
| 37 |
+
'name': "paragraph",
|
| 38 |
+
'id': '0',
|
| 39 |
+
'img_shape': (h, w)}
|
| 40 |
+
return [region]
|
| 41 |
+
|
| 42 |
+
def get_dist(line_polygon, regions):
|
| 43 |
+
"""Function for finding the closest region to the text line."""
|
| 44 |
+
dist, reg_id = 1000000, None
|
| 45 |
+
line_polygon = validate_polygon(line_polygon)
|
| 46 |
+
if line_polygon:
|
| 47 |
+
for region in regions:
|
| 48 |
+
# Calculates dictance between line and regions polygons
|
| 49 |
+
region_polygon = validate_polygon(region['coords'])
|
| 50 |
+
if region_polygon:
|
| 51 |
+
line_reg_dist = line_polygon.distance(region_polygon)
|
| 52 |
+
if line_reg_dist < dist:
|
| 53 |
+
dist = line_reg_dist
|
| 54 |
+
reg_id = region['id']
|
| 55 |
+
return reg_id
|
| 56 |
+
|
| 57 |
+
def validate_polygon(polygon):
|
| 58 |
+
""""Function for testing and correcting the validity of polygons."""
|
| 59 |
+
if len(polygon) > 2:
|
| 60 |
+
polygon = Polygon(polygon)
|
| 61 |
+
if not polygon.is_valid:
|
| 62 |
+
polygon = make_valid(polygon)
|
| 63 |
+
return polygon
|
| 64 |
+
else:
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
def get_iou(poly1, poly2):
|
| 68 |
+
"""Function for calculating Intersection over Union (IoU) values."""
|
| 69 |
+
# If the polygons don't intersect, IoU is 0
|
| 70 |
+
iou = 0
|
| 71 |
+
poly1 = validate_polygon(poly1)
|
| 72 |
+
poly2 = validate_polygon(poly2)
|
| 73 |
+
if poly1 and poly2:
|
| 74 |
+
if poly1.intersects(poly2):
|
| 75 |
+
# Calculates intersection of the 2 polygons
|
| 76 |
+
intersect = poly1.intersection(poly2).area
|
| 77 |
+
# Calculates union of the 2 polygons
|
| 78 |
+
uni = poly1.union(poly2)
|
| 79 |
+
# Calculates intersection over union
|
| 80 |
+
iou = intersect / uni.area
|
| 81 |
+
return iou
|
| 82 |
+
|
| 83 |
+
def get_line_regions(lines, regions):
|
| 84 |
+
"""Function for connecting each text line to one region.
|
| 85 |
+
|
| 86 |
+
Docstring generated with Claude
|
| 87 |
+
Connect each text line to a region based on intersection or distance.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
lines (dict): Dictionary containing text line information with keys:
|
| 91 |
+
- 'coords' (list): List of line polygons (coordinates). E.g. [[x1,y1], ..., [xn,yn]]
|
| 92 |
+
- 'max_min' (list): List of bounding box coordinates for each line
|
| 93 |
+
regions (list): List of region dictionaries, each containing:
|
| 94 |
+
- 'coords': Region polygon coordinates. E.g. [[x1,y1], ..., [xn,yn]]
|
| 95 |
+
- 'id': Region identifier
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
list: List of dictionaries, each representing a line with keys:
|
| 99 |
+
- 'polygon': Line polygon coordinates
|
| 100 |
+
- 'reg_id': ID of the region the line belongs to
|
| 101 |
+
- 'max_min': Bounding box coordinates [x_min, y_min, x_max, y_max]
|
| 102 |
+
|
| 103 |
+
"""
|
| 104 |
+
lines_list = []
|
| 105 |
+
for i in range(len(lines['coords'])):
|
| 106 |
+
iou, reg_id = 0, ''
|
| 107 |
+
max_min = [0.0, 0.0, 0.0, 0.0]
|
| 108 |
+
polygon = lines['coords'][i]
|
| 109 |
+
for region in regions:
|
| 110 |
+
line_reg_iou = get_iou(polygon, region['coords'])
|
| 111 |
+
if line_reg_iou > iou:
|
| 112 |
+
iou = line_reg_iou
|
| 113 |
+
reg_id = region['id']
|
| 114 |
+
# If line polygon does not intersect with any region, a distance metric is used for defining
|
| 115 |
+
# the region that the line belongs to
|
| 116 |
+
if iou == 0:
|
| 117 |
+
reg_id = get_dist(polygon, regions)
|
| 118 |
+
|
| 119 |
+
if (len(lines['max_min']) - 1) >= i:
|
| 120 |
+
max_min = lines['max_min'][i]
|
| 121 |
+
|
| 122 |
+
new_line = {'polygon': polygon, 'reg_id': reg_id, 'max_min': max_min}
|
| 123 |
+
lines_list.append(new_line)
|
| 124 |
+
return lines_list
|
| 125 |
+
|
| 126 |
+
def order_regions_lines(lines, regions):
|
| 127 |
+
"""Function for ordering line predictions inside each region.
|
| 128 |
+
|
| 129 |
+
Docstring generated with Claude
|
| 130 |
+
Order text lines within each region and order the regions themselves.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
lines (list): List of line dictionaries, each containing:
|
| 134 |
+
- 'reg_id': ID of the region the line belongs to
|
| 135 |
+
- 'max_min': Bounding box coordinates for the line
|
| 136 |
+
- 'polygon': Line polygon coordinates
|
| 137 |
+
regions (list): List of region dictionaries, each containing:
|
| 138 |
+
- 'id': Region identifier
|
| 139 |
+
- 'coords': Region polygon coordinates
|
| 140 |
+
- 'name': Region name
|
| 141 |
+
- 'max_min': Bounding box coordinates for the region
|
| 142 |
+
- 'img_shape': Shape of the source image
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
list: List of ordered region dictionaries, each containing:
|
| 146 |
+
- 'region_coords': Region polygon coordinates
|
| 147 |
+
- 'region_name': Region name
|
| 148 |
+
- 'lines': Ordered list of line polygons within the region
|
| 149 |
+
- 'img_shape': Shape of the source image
|
| 150 |
+
|
| 151 |
+
Note: Only regions containing at least one line are included in the output.
|
| 152 |
+
Both lines within regions and regions themselves are ordered by their spatial positions.
|
| 153 |
+
"""
|
| 154 |
+
regions_with_rows = []
|
| 155 |
+
region_max_mins = []
|
| 156 |
+
for i, region in enumerate(regions):
|
| 157 |
+
line_max_mins = []
|
| 158 |
+
line_polygons = []
|
| 159 |
+
for line in lines:
|
| 160 |
+
if line['reg_id'] == region['id']:
|
| 161 |
+
line_max_mins.append(line['max_min'])
|
| 162 |
+
line_polygons.append(line['polygon'])
|
| 163 |
+
if line_polygons:
|
| 164 |
+
# If one or more lines are connected to a region, line order inside the region is defined
|
| 165 |
+
# and the predicted text lines are joined in the same python dict
|
| 166 |
+
line_order = order_poly.order(line_max_mins)
|
| 167 |
+
line_polygons = [line_polygons[i] for i in line_order]
|
| 168 |
+
new_region = {'region_coords': region['coords'],
|
| 169 |
+
'region_name': region['name'],
|
| 170 |
+
'lines': line_polygons,
|
| 171 |
+
'img_shape': region['img_shape']}
|
| 172 |
+
region_max_mins.append(region['max_min'])
|
| 173 |
+
regions_with_rows.append(new_region)
|
| 174 |
+
else:
|
| 175 |
+
continue
|
| 176 |
+
# Creates an ordering of the detected regions based on their polygon coordinates
|
| 177 |
+
region_order = order_poly.order(region_max_mins)
|
| 178 |
+
regions_with_rows = [regions_with_rows[i] for i in region_order]
|
| 179 |
+
return regions_with_rows
|