import numpy as np import cv2 from PIL import Image import base64 import io from typing import List def encode_image_to_base64(img_array : np.ndarray): """Convert a numpy image array to base64-encoded PNG.""" # If float, convert to uint8 if np.issubdtype(img_array.dtype, np.floating): img_array = np.clip(img_array * 255, 0, 255).astype(np.uint8) img_pil = Image.fromarray(img_array) buf = io.BytesIO() img_pil.save(buf, format="PNG") return base64.b64encode(buf.getvalue()).decode('utf-8') def crop_to_mask_size(output, mask): """ Crops the output image to the bounding box of the mask. Parameters: output: The image to be cropped (HxWxC array). mask: The binary mask (HxW array). Returns: Cropped image (HxWxC array). """ if mask.ndim == 3 and mask.shape[0] == 1: mask = mask[0] # -> now (H, W) ys, xs = np.where(mask) if xs.size and ys.size: x0, x1 = xs.min(), xs.max() y0, y1 = ys.min(), ys.max() return output[y0:y1+1, x0:x1+1] return output def apply_mask(image, mask, background_mode="remove"): """ Applies the given mask to the image based on the specified background mode. Parameters: image: The input image (HxWx3 array). mask: The binary mask (HxW array). background_mode: The mode for background processing ("extreme_blur", "highlight", or "remove"). Returns: Processed image (HxWx3 uint8 array). """ # If the mask has a leading batch dimension, squeeze it if mask.ndim == 3 and mask.shape[0] == 1: mask = mask[0] # -> now (H, W) # Expand the mask into a 3-channel float mask of shape (H, W, 3) mask_3c = np.repeat(mask[..., None], 3, axis=2).astype(np.float32) img_f = image.astype(np.float32) one_c = 1.0 - mask_3c if background_mode == "extreme_blur": blurred = cv2.GaussianBlur(image, (101, 101), 0).astype(np.float32) output_f = img_f * mask_3c + blurred * one_c elif background_mode == "highlight": alpha = 0.5 overlay_color = np.array([255, 0, 0], dtype=np.float32) # pure red output_f = img_f.copy() output_f[mask] = (1 - alpha) * img_f[mask] + alpha * overlay_color else: # "remove" white = np.full_like(img_f, 255, dtype=np.float32) output_f = img_f * mask_3c + white * one_c return output_f.astype(np.uint8) def resize_image(image, max_width=800): """ Resizes a numpy array image (RGB) to a maximum width of 800px, preserving aspect ratio. """ if image is None: return None pil_img = Image.fromarray(image) width, height = pil_img.size if width > max_width: new_height = int(height * (max_width / width)) resized_img = pil_img.resize((max_width, new_height), Image.LANCZOS) return np.array(resized_img) else: return image