UniBioTransfer / my_py_lib /image_util.py
scy639's picture
Upload folder using huggingface_hub
2b534de verified
import numpy as np
import os,sys,cv2
import PIL
from PIL import Image
from pathlib import Path
def to__image_in_npArr(img):
"""
convert PIL/np.ndarray type image to np.ndarray
Equivalent to misc_util.to_ndarray
"""
if isinstance(img, np.ndarray):
return img
if isinstance(img, PIL.Image.Image):
return np.array(img)
import torch
if isinstance(img, torch.Tensor):
return img.detach().cpu().numpy()
raise TypeError("got {}".format(type(img)))
def imgArr_2_objXminYminXmaxYmax(imgArr, bg_color, THRES=5, coarse_bbox=None,diff_type='A'):
"""
param:
imgArr: np.array
bg_color: background color in the form of a tuple (R, G, B)
coarse_bbox: find bbox inside the coarse_bbox
return:
xmin,ymin,xmax,ymax (type= primitive int,NOT np int)
"""
img_array = imgArr
if coarse_bbox is not None:
xmin_coarse, ymin_coarse, xmax_coarse, ymax_coarse = coarse_bbox
img_array = img_array[ymin_coarse:ymax_coarse, xmin_coarse:xmax_coarse]
if diff_type=='A':
# Extract pixels from the image that are different from the background color
diff_pixels = np.any(np.abs(img_array - np.array(bg_color)) > THRES, axis=2)
elif diff_type=='B':
# Extract pixels from the image that are different from the background color
diff_pixels =( np.sum(np.abs(img_array - np.array(bg_color)) , axis=2)> THRES)
# Calculate the bounding box of the object
rows = np.any(diff_pixels, axis=1)
cols = np.any(diff_pixels, axis=0)
ymin, ymax = np.where(rows)[0][[0, -1]]
xmin, xmax = np.where(cols)[0][[0, -1]]
xmin=xmin.item()
ymin=ymin.item()
xmax=xmax.item()
ymax=ymax.item()
if coarse_bbox is not None:
xmin += xmin_coarse
ymin += ymin_coarse
xmax += xmin_coarse
ymax += ymin_coarse
return xmin, ymin, xmax, ymax
def draw_bbox(img, bbox, color=None, thickness=2,bbox_type='x0y0wh'):
"""
xmin,ymin,xmax,ymax
"""
img = np.copy(img)
if color is not None:
color = [int(c) for c in color]
else:
color = (0, 255, 0)
if bbox_type=='x0y0wh':
left = int(round(bbox[0]))
top = int(round(bbox[1]))
width = int(round(bbox[2]))
height = int(round(bbox[3]))
elif bbox_type=='x0y0x1y1':
left,top,right,bottom=bbox
width = right-left
height = bottom-top
img = cv2.rectangle(img, (left, top), (left + width, top + height), color, thickness=thickness)
return img
def print_image_statistics(
image,
reduce_line:bool = 1, # reduce printed lines by condensing multi-line output
#
return_:bool = False,
print_:bool = True,
):
"""
Print image statistics:
type
dtype and shape
min, max, mean, median, unique values for each channel
"""
string = "----[statistics]----\n"
string += f"type = {type(image)}\n"
image = to__image_in_npArr(image)
string += f"dtype = {image.dtype}\n"
string += f"shape = {image.shape}\n"
if image.shape[0]==3 or image.shape[0]==4 or image.shape[0]==1:
if image.shape[1] > 13:
print("Assuming the first axis is channel", end=' ')
if len(image.shape) == 2:
raise NotImplementedError
image = image.transpose(1, 2, 0)
print(f"transposed {image.shape=}")
else:
print("[warning] the first axis might be the channel dimension")
if len(image.shape) == 2:
channels = [image]
else:
# channels = np.split(image, image.shape[-1], axis=-1)#poe generated, I cannot understand easily
channels = [image[:, :, i] for i in range(image.shape[-1])]
for i, channel in enumerate(channels):
uniques=np.unique(channel)
_N=6
if len(uniques)>_N:
s_uniques = " ".join([f"{x:.3f}" for x in uniques[:_N//2]])# Format the first half with two decimals
s_uniques+=' .. '
s_uniques += " ".join([f"{x:.3f}" for x in uniques[-_N//2:]])
else:
s_uniques = " ".join([f"{x:.3f}" for x in uniques])
if not reduce_line:
string += f"\nChannel {i }:\n"
string += f" Min: {np.min(channel)}\n"
string += f" Max: {np.max(channel)}\n"
string += f" Mean: {np.mean(channel)}\n"
string += f" Median: {np.median(channel)}\n"
string += f" Unique values: {s_uniques}\n"
else:
string += f"Channel {i}: Min={np.min(channel):<8.2f} Max={np.max(channel):<8.2f} Mean={np.mean(channel):<8.2f} Median={np.median(channel):<8.2f} Unique={s_uniques}\n"
if reduce_line: # remove the first few newline characters from string
def remove_first_n_char(text, char, n=3):
modified = text
for _ in range(n):
modified = modified.replace(char, '', 1)
return modified
string = remove_first_n_char(string,'\n')
string=string.replace('\n','\n|')
string += "----[statistics]over----\n"
if print_:
print(string)
if return_:
return string
def pad_around_center(img, new_size, ):
"""
Pad image to a new size with fill color around image center.
pad with white (255)
"""
img = to__image_in_npArr(img)
assert len(img.shape) == 3
assert len(new_size) == 2
# compute padding
height, width, _ = img.shape
new_height, new_width = new_size
assert new_height >= height
assert new_width >= width
pad_height = new_height - height
pad_width = new_width - width
pad_top = pad_height // 2
pad_bottom = pad_height - pad_top
pad_left = pad_width // 2
pad_right = pad_width - pad_left
# pad image
img = np.pad(
img,
pad_width=((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)),
mode="constant",
constant_values=255,
)
return img
def norm_min0max255_image_per_channel(image, ):
"""
norm to 0-255 for each channel (min=0, max=255 for each channel)
Args:
image: file path (str) or PIL Image object
Returns:
normalized PIL Image
"""
if isinstance(image, str):
img_pil = Image.open(image).convert('RGB')
else:
img_pil = image.convert('RGB')
img_array = np.array(img_pil).astype(np.float32)
for channel in range(3):
channel_data = img_array[:, :, channel]
c_min = np.min(channel_data)
c_max = np.max(channel_data)
if c_max > c_min:
img_array[:, :, channel] = (channel_data - c_min) * (255.0 / (c_max - c_min))
else: # fallback when all channel values are identical
pass
img_array = np.clip(img_array, 0, 255).astype(np.uint8)
if 1:
for c in range(3):
channel_data = img_array[:, :, c]
c_min = np.min(channel_data)
c_max = np.max(channel_data)
# allow up to ±3 absolute error
if abs(c_min-0)>3 or abs(c_max-255)>3:
print_image_statistics(img_array)
assert 0
img_pil = Image.fromarray(img_array)
return img_pil
def imgs_2_grid_A(
imgs, # list of RGB images (PIL or numpy arrays)
# any provided mask makes masked pixels lighter across images
masks=None,
# if provided, save grid
grid_path=None,
# other settings
downsample=1, # downsample factor for the grid
inv_mask:bool=False,
resize_mode:str=None, # None | 'mask_to_img' | 'img_to_mask' (resize img to match mask shape)
grid_layout:str="row", # 'row' | 'column' |'square'
auto_pad_if_not_same_size=True,
verbose :int = 1,
):
"""
Create a grid of images from paths, optionally with masks overlaid.
"""
from pathlib import Path
import PIL.Image
import numpy as np
import torchvision.utils as vutils
import torch
images = []
for i, img in enumerate(imgs):
if isinstance(img, PIL.Image.Image):
pass
else:
if verbose>0:
print(f"{img.shape=}")
img = to__image_in_npArr(img)
if isinstance(img, np.ndarray):
img = PIL.Image.fromarray(img)
# else:
# raise TypeError(f"Images must be PIL Image or numpy array{type(img)}")
if not isinstance(img, PIL.Image.Image):
raise TypeError(f"Images must be PIL Image or numpy array{type(img)}")
img_tensor = torch.tensor(np.array(img).transpose(2, 0, 1)) / 255.0
if masks is not None:
mask = masks[i]
if isinstance(mask, np.ndarray):
mask = PIL.Image.fromarray(mask)
if not mask.mode == 'L':
mask = mask.convert('L')
if resize_mode is None:
pass
elif resize_mode == "img_to_mask":
img_tensor = torch.nn.functional.interpolate(
img_tensor.unsqueeze(0),
size=(mask.height, mask.width),
mode='bilinear',
align_corners=False
).squeeze(0)
elif resize_mode == "mask_to_img":
mask = mask.resize((img_tensor.shape[2], img_tensor.shape[1]), PIL.Image.BILINEAR)
else:
raise NotImplementedError
mask_np = np.array(mask) / 255.0
mask_tensor = torch.tensor(mask_np).unsqueeze(0).repeat(3, 1, 1)
if inv_mask:
mask_tensor = 1 - mask_tensor
# make masked pixels lighter
img_tensor = img_tensor * 0.3 + 0.7 * mask_tensor
# Apply auto padding if needed
if auto_pad_if_not_same_size and i > 0 and (img_tensor.shape[1] != images[0].shape[1] or img_tensor.shape[2] != images[0].shape[2]):
# Resize to match the first image dimensions
img_tensor = torch.nn.functional.interpolate(
img_tensor.unsqueeze(0),
size=(images[0].shape[1], images[0].shape[2]),
mode='bilinear',
align_corners=False
).squeeze(0)
images.append(img_tensor)
if grid_layout == "row":
grid_tensor = vutils.make_grid(images, nrow=len(images), )
elif grid_layout == "column":
grid_tensor = vutils.make_grid(images, nrow=1, )
elif grid_layout == "square":
grid_tensor = vutils.make_grid(images, nrow=int(np.sqrt(len(images))), )
else:
raise NotImplementedError
grid = grid_tensor.numpy().transpose(1, 2, 0)
grid = PIL.Image.fromarray((grid * 255).astype(np.uint8))
if downsample > 1:
original_size = grid.size
new_size = (original_size[0] // downsample, original_size[1] // downsample)
grid = grid.resize(new_size, PIL.Image.LANCZOS)
if grid_path is not None:
grid_path = Path(grid_path)
grid_path.parent.mkdir(parents=False, exist_ok=True)
grid.save(grid_path)
if verbose>-1:
print(f"saved {grid_path}")
return grid
def img_paths_2_grid_A(
paths, # paths of rgb img
# any mask option makes masked pixels lighter per image
mask_paths=None,
path_img_2_path_mask=None, # callback to convert RGB image path to mask path
# if provided, save grid
grid_path=None,
# other settings
downsample=1, # downsample factor for the grid
inv_mask:bool=False,
resize_mode:str=None, # None | 'mask_to_img' | 'img_to_mask' (resize image to match mask shape)
grid_layout:str="row", # 'row' | 'column' |'square'
auto_pad_if_not_same_size=True,
):
"""
Create a grid of images from paths, optionally with masks overlaid.
"""
import PIL.Image
# Load images from paths
imgs = [PIL.Image.open(path).convert('RGB') for path in paths]
# Load masks if provided
masks = None
if mask_paths is not None:
masks = [PIL.Image.open(mask_path).convert('L') for mask_path in mask_paths]
elif path_img_2_path_mask is not None:
masks = [PIL.Image.open(path_img_2_path_mask(path)).convert('L') for path in paths]
# Call the img_2_grid_A function
return imgs_2_grid_A(
imgs=imgs,
masks=masks,
grid_path=grid_path,
downsample=downsample,
inv_mask=inv_mask,
resize_mode=resize_mode,
grid_layout=grid_layout,
auto_pad_if_not_same_size=auto_pad_if_not_same_size,
)
def save_any_A(
a,
path=None, # only valid when !dont_save
dont_save = False,
# log
print_info :bool = True,
value_range: tuple = None, # (min, max) tuple to specify value range, if None then auto determine
):
"""
can auto determine or specify by param:
data shape mode:
...,1/3/4,h,w ; ...,h,w,1/3/4 ;
value range:
0-1 ; -1~1 ; 0-255
after scaling to 0-255, save a grid containing two images:
scaled image
contrast-adjusted scaled image via linear transform so min=0 and max=255
"""
a:np.ndarray = to__image_in_npArr(a)
a = a.copy()
if print_info:
import torch; from .torch_util import custom_repr_v3
print(custom_repr_v3(torch.Tensor(a)))
while(a.ndim>3):
a=a[0]
#-----------now a is chw | hwc --------------------------------------------------------
if a.ndim > 2:
if a.shape[-3] <= 4:
if a.shape[-3] <= a.shape[-1] and a.shape[-3] <= a.shape[-2]:
# assume the -3 axis is the channel dimension; convert chw -> hwc
a = a.transpose(1, 2, 0) # chw -> hwc
else: # ndim==2
a = np.expand_dims(a, axis=-1) # hw -> hwc
#-----------now a is hwc --------------------------------------------------------
if value_range is None: # Auto determine
mean = np.mean(a)
std = np.std(a)
min_ = np.min(a)
max_ = np.max(a)
if a.dtype == np.uint8 or a.dtype == np.int32 or a.dtype == np.int64:
range_ = (0, 255)
elif a.dtype == bool:
range_ = (0, 1)
elif max_ > 100:
range_ = (0, 255)
elif mean > 1:
range_ = (0, 255)
elif min_ <= -1 or mean < 0 : # treat as range -1 to 1
range_ = (-1, 1)
else: # treat as range 0 to 1
range_ = (0, 1)
print(f"Auto determined {range_=}")
else:
range_ = value_range
range_min, range_max = range_
if a.dtype == bool:
a = a.astype(np.uint8) * 255 # bool -> 0/255
else:
if range_min == 0 and range_max == 255:
pass
else:
# Custom range, normalize to 0~255
a = (a - range_min) / (range_max - range_min) * 255
#-----------now a is hwc and scaled to 0~255 --------------------------------------------------------
if a.shape[-1] == 1:
a = np.repeat(a, 3, axis=-1)
#-----------now a is hwc, 0~255, and channels==3/4 --------------------------------------------------------
if 1: # create contrast-adjusted version by linearly mapping min to 0 and max to 255
a_contrast = a.copy().astype(np.float32)
current_min = np.min(a_contrast)
current_max = np.max(a_contrast)
if current_max > current_min: # avoid division by zero
a_contrast = (a_contrast - current_min) / (current_max - current_min) * 255
a = np.clip(a, 0, 255).astype(np.uint8)
a_contrast = np.clip(a_contrast, 0, 255).astype(np.uint8)
if dont_save:
path = None
else:
if path is None:
save_dir = Path("/tmp/scy_auto_save")
save_dir.mkdir(exist_ok=True)
import time
timestamp = int(time.time() * 1000) # milliseconds for uniqueness
ext = "jpg" if a.shape[-1] <= 3 else "png" # Use jpg by default if num channels <= 3
path = save_dir / f"auto_{timestamp}.{ext}"
else:
path = Path(path)
path.parent.mkdir(exist_ok=True)
path = str(path)
grid = imgs_2_grid_A( # create grid with 2 images: original scaled + contrast adjusted
imgs=[a, a_contrast],
grid_path=path,
grid_layout="row",
verbose = -1,
)
if not dont_save: print(f"{path}")
return grid