Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| class ImageUtils: | |
| def resize(self, image, target_size, is_mask=False): | |
| if is_mask and image.ndim == 3: | |
| image = image[:, :, 0] | |
| if image.ndim == 3: | |
| h, w, c = image.shape | |
| else: | |
| h, w = image.shape | |
| c = 1 | |
| max_dim = max(h, w) | |
| scale = target_size / max_dim | |
| new_w = int(w * scale) | |
| new_h = int(h * scale) | |
| interpolation = cv2.INTER_NEAREST if is_mask else cv2.INTER_LINEAR | |
| resized = cv2.resize(image, (new_w, new_h), interpolation=interpolation) | |
| if is_mask: | |
| canvas = np.zeros((target_size, target_size)) if c == 1 else \ | |
| np.zeros((target_size, target_size, c)) | |
| else: | |
| canvas = np.full((target_size, target_size), 255, dtype=np.uint8) if c == 1 else \ | |
| np.full((target_size, target_size, c), 255, dtype=np.uint8) | |
| top = (target_size - new_h) // 2 | |
| left = (target_size - new_w) // 2 | |
| if c > 1: | |
| canvas[top:top+new_h, left:left+new_w, :] = resized | |
| else: | |
| canvas[top:top+new_h, left:left+new_w] = resized | |
| return canvas, scale, top, left, (h, w) | |
| def to_cv2(self, image): | |
| return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| def normalize(self, image): | |
| if image.ndim == 2: | |
| image = image / 255.0 | |
| image = np.expand_dims(image, axis=-1) | |
| else: | |
| image = image / 255.0 | |
| return image | |
| def darken_mask(self, mask): | |
| return (mask * 255).astype(np.uint8) | |
| def restore_size(self, mask, scale, top, left, orig_h, orig_w): | |
| mask_unpadded = mask[top:top+int(orig_h*scale), left:left+int(orig_w*scale)] | |
| mask_resized = cv2.resize(mask_unpadded.astype(np.uint8), | |
| (orig_w, orig_h), | |
| interpolation=cv2.INTER_NEAREST) | |
| return mask_resized | |
| def mask_image(self, image, mask): | |
| mask = (mask * 255).astype(np.uint8) | |
| return cv2.bitwise_and(image, image, mask=mask) | |
| def extract_masks(self, image, mask): | |
| words = [] | |
| unique_labels = np.unique(mask) | |
| unique_labels = unique_labels[unique_labels != 0] | |
| for label in unique_labels: | |
| binary_mask = (mask == label).astype(np.uint8) | |
| contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| for cnt in contours: | |
| x, y, w, h = cv2.boundingRect(cnt) | |
| cropped_word = image[y:y+h, x:x+w] | |
| words.append((cropped_word, (x, y, w, h))) | |
| return words | |
| def order_words(self, words, vertical_padding_factor=1.2, min_padding_px=2, height_mode='min'): | |
| items = [] | |
| heights = [] | |
| for img, (x, y, w, h) in words: | |
| cx = x + w / 2.0 | |
| cy = y + h / 2.0 | |
| items.append({'img': img, 'x': x, 'y': y, 'w': w, 'h': h, 'cx': cx, 'cy': cy}) | |
| heights.append(h) | |
| heights = np.array(heights) | |
| if len(heights) == 0: | |
| return [], [] | |
| if height_mode == 'median': | |
| base_h = float(np.median(heights)) | |
| else: | |
| base_h = float(np.min(heights)) | |
| pad = max(int(round(base_h * vertical_padding_factor)), int(min_padding_px)) | |
| items_sorted = sorted(items, key=lambda it: it['cy']) | |
| lines = [] | |
| current_line = [items_sorted[0]] | |
| for it in items_sorted[1:]: | |
| current_median = np.median([p['cy'] for p in current_line]) | |
| if abs(it['cy'] - current_median) <= pad: | |
| current_line.append(it) | |
| else: | |
| lines.append(current_line) | |
| current_line = [it] | |
| if current_line: | |
| lines.append(current_line) | |
| ordered_items = [] | |
| for line in lines: | |
| line.sort(key=lambda it: it['x']) | |
| ordered_items.extend(line) | |
| ordered_imgs = [it['img'] for it in ordered_items] | |
| ordered_bboxes = [(int(it['x']), int(it['y']), int(it['w']), int(it['h'])) for it in ordered_items] | |
| return ordered_imgs, ordered_bboxes |