Spaces:
Running
Running
File size: 4,796 Bytes
ec6ad2f e05ccab ec6ad2f e05ccab ec6ad2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
from typing import List, Tuple
from pathlib import Path
from .config import Config
import numpy as np
import cv2
class ImageProcessor:
"""Handles image preprocessing operations."""
def __init__(self, config: Config):
self.config = config
def mask_text_regions(self, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
"""Mask text regions in the image to reduce panel extraction noise."""
image = cv2.imread(self.config.input_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {self.config.input_path}")
for bbox in bboxes:
x1, y1, x2, y2 = bbox
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, image)
print(f"✅ Text-masked image saved to: {output_path}")
return str(output_path)
def preprocess_image(self, masked_image_path) -> Tuple[str, str, str]:
"""Preprocess image for panel extraction."""
image = cv2.imread(masked_image_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {masked_image_path}")
# Convert to grayscale and binary
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
# Dilate to strengthen borders
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(binary, kernel, iterations=2)
# Save intermediate results
gray_path = f'{self.config.output_folder}/2_gray.jpg'
binary_path = f'{self.config.output_folder}/3_binary.jpg'
dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
cv2.imwrite(str(gray_path), gray)
cv2.imwrite(str(binary_path), binary)
cv2.imwrite(str(dilated_path), dilated)
return str(gray_path), str(binary_path), str(dilated_path)
def thin_image_borders(self, processed_image_path: str, output_filename: str = "5_thin_border.jpg") -> str:
"""
Clean dilated image by thinning thick borders and removing hanging clusters.
"""
from skimage.morphology import skeletonize, remove_small_objects
from skimage.measure import label
# Load image
img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
_, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV) # invert, binary mask (0,1)
# Skeletonize
skeleton = skeletonize(binary).astype(np.uint8)
# Remove small hanging clusters
labeled = label(skeleton, connectivity=2)
cleaned = remove_small_objects(labeled, min_size=150) # Adjust min_size for more/less pruning
# Convert back to 0–255 uint8 image
final = (cleaned > 0).astype(np.uint8) * 255
# Invert back if needed
result = 255 - final
# Save
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, result)
print(f"✅ Cleaned and thinned image saved to: {output_path}")
return str(output_path)
def clean_dilated_image(self, dilated_path: str,
output_filename: str = "6_dilated_cleaned.jpg",
max_neighbors: int = 2) -> str:
"""Clean dilated image by thinning thick borders."""
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
if dilated is None:
raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")
binary = (dilated == 0).astype(np.uint8)
padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
cleaned = binary.copy()
height, width = binary.shape
row_black_counts = np.sum(binary, axis=1)
for y in range(1, height + 1):
for x in range(1, width + 1):
if padded[y, x] == 1:
neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
if neighbors > max_neighbors:
neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
if neighbor_rows:
row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
if y == row_to_clear:
cleaned[y-1, x-1] = 0
cleaned_img = (1 - cleaned) * 255
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(str(output_path), cleaned_img)
print(f"✅ Cleaned dilated image saved to: {output_path}")
return str(output_path) |