jebin2's picture
made thin border
e05ccab
raw
history blame
4.8 kB
from typing import List, Tuple
from pathlib import Path
from .config import Config
import numpy as np
import cv2
class ImageProcessor:
"""Handles image preprocessing operations."""
def __init__(self, config: Config):
self.config = config
def mask_text_regions(self, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
"""Mask text regions in the image to reduce panel extraction noise."""
image = cv2.imread(self.config.input_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {self.config.input_path}")
for bbox in bboxes:
x1, y1, x2, y2 = bbox
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, image)
print(f"βœ… Text-masked image saved to: {output_path}")
return str(output_path)
def preprocess_image(self, masked_image_path) -> Tuple[str, str, str]:
"""Preprocess image for panel extraction."""
image = cv2.imread(masked_image_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {masked_image_path}")
# Convert to grayscale and binary
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
# Dilate to strengthen borders
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(binary, kernel, iterations=2)
# Save intermediate results
gray_path = f'{self.config.output_folder}/2_gray.jpg'
binary_path = f'{self.config.output_folder}/3_binary.jpg'
dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
cv2.imwrite(str(gray_path), gray)
cv2.imwrite(str(binary_path), binary)
cv2.imwrite(str(dilated_path), dilated)
return str(gray_path), str(binary_path), str(dilated_path)
def thin_image_borders(self, processed_image_path: str, output_filename: str = "5_thin_border.jpg") -> str:
"""
Clean dilated image by thinning thick borders and removing hanging clusters.
"""
from skimage.morphology import skeletonize, remove_small_objects
from skimage.measure import label
# Load image
img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
_, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV) # invert, binary mask (0,1)
# Skeletonize
skeleton = skeletonize(binary).astype(np.uint8)
# Remove small hanging clusters
labeled = label(skeleton, connectivity=2)
cleaned = remove_small_objects(labeled, min_size=150) # Adjust min_size for more/less pruning
# Convert back to 0–255 uint8 image
final = (cleaned > 0).astype(np.uint8) * 255
# Invert back if needed
result = 255 - final
# Save
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, result)
print(f"βœ… Cleaned and thinned image saved to: {output_path}")
return str(output_path)
def clean_dilated_image(self, dilated_path: str,
output_filename: str = "6_dilated_cleaned.jpg",
max_neighbors: int = 2) -> str:
"""Clean dilated image by thinning thick borders."""
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
if dilated is None:
raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")
binary = (dilated == 0).astype(np.uint8)
padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
cleaned = binary.copy()
height, width = binary.shape
row_black_counts = np.sum(binary, axis=1)
for y in range(1, height + 1):
for x in range(1, width + 1):
if padded[y, x] == 1:
neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
if neighbors > max_neighbors:
neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
if neighbor_rows:
row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
if y == row_to_clear:
cleaned[y-1, x-1] = 0
cleaned_img = (1 - cleaned) * 255
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(str(output_path), cleaned_img)
print(f"βœ… Cleaned dilated image saved to: {output_path}")
return str(output_path)