object-memory / core /image_processing.py
russ4stall
fresh history
24f3fb6
import numpy as np
import cv2
from PIL import Image
import base64
import io
from typing import List
def encode_image_to_base64(img_array : np.ndarray):
"""Convert a numpy image array to base64-encoded PNG."""
# If float, convert to uint8
if np.issubdtype(img_array.dtype, np.floating):
img_array = np.clip(img_array * 255, 0, 255).astype(np.uint8)
img_pil = Image.fromarray(img_array)
buf = io.BytesIO()
img_pil.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode('utf-8')
def crop_to_mask_size(output, mask):
"""
Crops the output image to the bounding box of the mask.
Parameters:
output: The image to be cropped (HxWxC array).
mask: The binary mask (HxW array).
Returns:
Cropped image (HxWxC array).
"""
if mask.ndim == 3 and mask.shape[0] == 1:
mask = mask[0] # -> now (H, W)
ys, xs = np.where(mask)
if xs.size and ys.size:
x0, x1 = xs.min(), xs.max()
y0, y1 = ys.min(), ys.max()
return output[y0:y1+1, x0:x1+1]
return output
def apply_mask(image, mask, background_mode="remove"):
"""
Applies the given mask to the image based on the specified background mode.
Parameters:
image: The input image (HxWx3 array).
mask: The binary mask (HxW array).
background_mode: The mode for background processing ("extreme_blur", "highlight", or "remove").
Returns:
Processed image (HxWx3 uint8 array).
"""
# If the mask has a leading batch dimension, squeeze it
if mask.ndim == 3 and mask.shape[0] == 1:
mask = mask[0] # -> now (H, W)
# Expand the mask into a 3-channel float mask of shape (H, W, 3)
mask_3c = np.repeat(mask[..., None], 3, axis=2).astype(np.float32)
img_f = image.astype(np.float32)
one_c = 1.0 - mask_3c
if background_mode == "extreme_blur":
blurred = cv2.GaussianBlur(image, (101, 101), 0).astype(np.float32)
output_f = img_f * mask_3c + blurred * one_c
elif background_mode == "highlight":
alpha = 0.5
overlay_color = np.array([255, 0, 0], dtype=np.float32) # pure red
output_f = img_f.copy()
output_f[mask] = (1 - alpha) * img_f[mask] + alpha * overlay_color
else: # "remove"
white = np.full_like(img_f, 255, dtype=np.float32)
output_f = img_f * mask_3c + white * one_c
return output_f.astype(np.uint8)
def resize_image(image, max_width=800):
"""
Resizes a numpy array image (RGB) to a maximum width of 800px, preserving aspect ratio.
"""
if image is None:
return None
pil_img = Image.fromarray(image)
width, height = pil_img.size
if width > max_width:
new_height = int(height * (max_width / width))
resized_img = pil_img.resize((max_width, new_height), Image.LANCZOS)
return np.array(resized_img)
else:
return image