Spaces:
Running
Running
File size: 7,018 Bytes
ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f f0c23ec ec6ad2f 76634d5 ec6ad2f 359e487 e05ccab 76634d5 8b56694 e05ccab ec6ad2f e05ccab ec6ad2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from typing import List, Tuple
from pathlib import Path
from .config import Config
import numpy as np
import cv2
class ImageProcessor:
"""Handles image preprocessing operations."""
def __init__(self, config: Config):
self.config = config
def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
"""Mask text regions in the image to reduce panel extraction noise."""
image = cv2.imread(input_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {input_path}")
for bbox in bboxes:
x1, y1, x2, y2 = bbox
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, image)
print(f"β
Text-masked image saved to: {output_path}")
return str(output_path)
def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
"""Preprocess image for panel extraction."""
image = cv2.imread(processed_image_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {processed_image_path}")
# Convert to grayscale and binary
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
is_inverted = False
# binary, is_inverted = self.invert_if_black_dominates(binary)
if not is_inverted:
# Dilate to strengthen borders
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(binary, kernel, iterations=2)
else: dilated = binary
# Save intermediate results
gray_path = f'{self.config.output_folder}/2_gray.jpg'
binary_path = f'{self.config.output_folder}/3_binary.jpg'
dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
cv2.imwrite(str(gray_path), gray)
cv2.imwrite(str(binary_path), binary)
cv2.imwrite(str(dilated_path), dilated)
return str(gray_path), str(binary_path), str(dilated_path), is_inverted
def invert_if_black_dominates(self, binary):
# Threshold to binary image
_, binary = cv2.threshold(binary, 127, 255, cv2.THRESH_BINARY)
# Count black and white pixels
black_pixels = np.sum(binary == 0)
white_pixels = np.sum(binary == 255)
# If black dominates, invert
if black_pixels > white_pixels:
print("π Inverting image because black > white")
inverted = cv2.bitwise_not(binary)
else:
print("β
No inversion needed, white >= black")
inverted = binary
# Save result
return inverted, black_pixels > white_pixels
def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg", min_area_ratio=0):
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
height, width = img.shape
# Threshold image to binary
_, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Find all contours
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Create mask for large contours (likely panel borders)
mask = np.zeros_like(binary)
for cnt in contours:
area = cv2.contourArea(cnt)
if area >= (height * width * min_area_ratio):
cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
# Apply mask to original image (keeps only large borders)
cleaned = cv2.bitwise_and(binary, binary, mask=mask)
# Optional: Apply morphological opening to clean tiny sketch lines
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
# Invert back if needed
cleaned = cv2.bitwise_not(cleaned)
# Save
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, cleaned)
print(f"β
Remove Inner Sketch image saved to: {output_path}")
return str(output_path)
def thin_image_borders(self, processed_image_path: str, output_filename: str = "6_thin_border.jpg") -> str:
"""
Clean dilated image by thinning thick borders and removing hanging clusters.
"""
from skimage.morphology import skeletonize, remove_small_objects
from skimage.measure import label
# Load image
img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
_, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV) # invert, binary mask (0,1)
# Skeletonize
skeleton = skeletonize(binary).astype(np.uint8)
# Remove small hanging clusters
labeled = label(skeleton, connectivity=2)
cleaned = remove_small_objects(labeled, min_size=150) # Adjust min_size for more/less pruning
# Convert back to 0β255 uint8 image
final = (cleaned > 0).astype(np.uint8) * 255
# Invert back if needed
result = 255 - final
# Save
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, result)
print(f"β
Cleaned and thinned image saved to: {output_path}")
return str(output_path)
def clean_dilated_image(self, dilated_path: str,
output_filename: str = "6_dilated_cleaned.jpg",
max_neighbors: int = 2) -> str:
"""Clean dilated image by thinning thick borders."""
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
if dilated is None:
raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")
binary = (dilated == 0).astype(np.uint8)
padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
cleaned = binary.copy()
height, width = binary.shape
row_black_counts = np.sum(binary, axis=1)
for y in range(1, height + 1):
for x in range(1, width + 1):
if padded[y, x] == 1:
neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
if neighbors > max_neighbors:
neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
if neighbor_rows:
row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
if y == row_to_clear:
cleaned[y-1, x-1] = 0
cleaned_img = (1 - cleaned) * 255
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(str(output_path), cleaned_img)
print(f"β
Cleaned dilated image saved to: {output_path}")
return str(output_path) |