File size: 16,504 Bytes

2b534de

import numpy as np
import os,sys,cv2
import PIL
from PIL import Image
from pathlib import Path

def to__image_in_npArr(img):
    """
    convert PIL/np.ndarray type image to np.ndarray
    Equivalent to misc_util.to_ndarray
    """
    if isinstance(img, np.ndarray):
        return img
    if isinstance(img, PIL.Image.Image):
        return np.array(img)
    import torch
    if isinstance(img, torch.Tensor):
        return img.detach().cpu().numpy()
    raise TypeError("got {}".format(type(img)))
def imgArr_2_objXminYminXmaxYmax(imgArr, bg_color, THRES=5, coarse_bbox=None,diff_type='A'):
    """
    param:
        imgArr: np.array
        bg_color: background color in the form of a tuple (R, G, B)
        coarse_bbox: find bbox inside the coarse_bbox
    return:
        xmin,ymin,xmax,ymax (type= primitive int,NOT np int)
    """
    img_array = imgArr
    if coarse_bbox is not None:
        xmin_coarse, ymin_coarse, xmax_coarse, ymax_coarse = coarse_bbox
        img_array = img_array[ymin_coarse:ymax_coarse, xmin_coarse:xmax_coarse]

    if diff_type=='A':
        # Extract pixels from the image that are different from the background color
        diff_pixels = np.any(np.abs(img_array - np.array(bg_color)) > THRES, axis=2)
    elif diff_type=='B':
        # Extract pixels from the image that are different from the background color
        diff_pixels =( np.sum(np.abs(img_array - np.array(bg_color)) , axis=2)> THRES)

    # Calculate the bounding box of the object
    rows = np.any(diff_pixels, axis=1)
    cols = np.any(diff_pixels, axis=0)
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    xmin=xmin.item()
    ymin=ymin.item()
    xmax=xmax.item()
    ymax=ymax.item()
    if coarse_bbox is not None:
        xmin += xmin_coarse
        ymin += ymin_coarse
        xmax += xmin_coarse
        ymax += ymin_coarse

    return xmin, ymin, xmax, ymax
def draw_bbox(img, bbox, color=None, thickness=2,bbox_type='x0y0wh'):
    """
    xmin,ymin,xmax,ymax
    """
    img = np.copy(img)
    if color is not None:
        color = [int(c) for c in color]
    else:
        color = (0, 255, 0)
    if bbox_type=='x0y0wh':
        left = int(round(bbox[0]))
        top = int(round(bbox[1]))
        width = int(round(bbox[2]))
        height = int(round(bbox[3]))
    elif bbox_type=='x0y0x1y1':
        left,top,right,bottom=bbox
        width = right-left
        height = bottom-top
    img = cv2.rectangle(img, (left, top), (left + width, top + height), color, thickness=thickness)
    return img




def print_image_statistics(
    image,
    reduce_line:bool = 1, # reduce printed lines by condensing multi-line output
    #
    return_:bool = False,
    print_:bool = True,
):
    """
    Print image statistics:
        type
        dtype and shape
        min, max, mean, median, unique values for each channel
    """
    string = "----[statistics]----\n"
    string += f"type = {type(image)}\n"
    image = to__image_in_npArr(image)
    string += f"dtype = {image.dtype}\n"
    string += f"shape = {image.shape}\n"

    if image.shape[0]==3 or image.shape[0]==4 or image.shape[0]==1:
        if image.shape[1] > 13:
            print("Assuming the first axis is channel", end=' ')
            if len(image.shape) == 2:
                raise NotImplementedError
            image = image.transpose(1, 2, 0)
            print(f"transposed {image.shape=}")
        else:
            print("[warning] the first axis might be the channel dimension")
    if len(image.shape) == 2:
        channels = [image]
    else:
        # channels = np.split(image, image.shape[-1], axis=-1)#poe generated, I cannot understand easily
        channels = [image[:, :, i] for i in range(image.shape[-1])]

    for i, channel in enumerate(channels):
        uniques=np.unique(channel)
        _N=6
        if len(uniques)>_N:
            s_uniques = " ".join([f"{x:.3f}" for x in uniques[:_N//2]])# Format the first half with two decimals
            s_uniques+=' .. '
            s_uniques += " ".join([f"{x:.3f}" for x in uniques[-_N//2:]])
        else:
            s_uniques = " ".join([f"{x:.3f}" for x in uniques])
        if not reduce_line:
            string += f"\nChannel {i }:\n"
            string += f"  Min: {np.min(channel)}\n"
            string += f"  Max: {np.max(channel)}\n"
            string += f"  Mean: {np.mean(channel)}\n"
            string += f"  Median: {np.median(channel)}\n"
            string += f"  Unique values: {s_uniques}\n"
        else:
            string += f"Channel {i}: Min={np.min(channel):<8.2f} Max={np.max(channel):<8.2f} Mean={np.mean(channel):<8.2f} Median={np.median(channel):<8.2f} Unique={s_uniques}\n"
    if reduce_line: # remove the first few newline characters from string
        def remove_first_n_char(text, char, n=3):
            modified = text
            for _ in range(n):
                modified = modified.replace(char, '', 1)
            return modified
        string = remove_first_n_char(string,'\n')
    string=string.replace('\n','\n|')
    string += "----[statistics]over----\n"
    if print_:
        print(string)
    if return_:
        return string

def pad_around_center(img, new_size,  ):
    """
    Pad image to a new size with fill color around image center.
    pad with white (255)
    """
    img = to__image_in_npArr(img)
    assert len(img.shape) == 3
    assert len(new_size) == 2

    # compute padding
    height, width, _ = img.shape
    new_height, new_width = new_size
    assert new_height >= height
    assert new_width >= width
    pad_height = new_height - height
    pad_width = new_width - width
    pad_top = pad_height // 2
    pad_bottom = pad_height - pad_top
    pad_left = pad_width // 2
    pad_right = pad_width - pad_left

    # pad image
    img = np.pad(
        img,
        pad_width=((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)),
        mode="constant",
        constant_values=255,
    )
    return img



def norm_min0max255_image_per_channel(image,  ):
    """
    norm to 0-255 for each channel (min=0, max=255 for each channel)
    
    Args:
        image: file path (str) or PIL Image object
    
    Returns:
        normalized PIL Image
    """
    if isinstance(image, str):
        img_pil = Image.open(image).convert('RGB')
    else:
        img_pil = image.convert('RGB')
    
    img_array = np.array(img_pil).astype(np.float32)
    
    for channel in range(3):
        channel_data = img_array[:, :, channel]
        c_min = np.min(channel_data)
        c_max = np.max(channel_data)
        
        if c_max > c_min:
            img_array[:, :, channel] = (channel_data - c_min) * (255.0 / (c_max - c_min))
        else:  # fallback when all channel values are identical
            pass
    img_array = np.clip(img_array, 0, 255).astype(np.uint8)
    if 1:
        for c in range(3):
            channel_data = img_array[:, :, c]
            c_min = np.min(channel_data)
            c_max = np.max(channel_data)
            # allow up to ±3 absolute error
            if abs(c_min-0)>3 or abs(c_max-255)>3:
                print_image_statistics(img_array)
                assert 0
    img_pil = Image.fromarray(img_array)
    return img_pil
            
def imgs_2_grid_A(
    imgs, # list of RGB images (PIL or numpy arrays)
    # any provided mask makes masked pixels lighter across images
    masks=None,
    # if provided, save grid
    grid_path=None,
    # other settings
    downsample=1, # downsample factor for the grid
    inv_mask:bool=False,
    resize_mode:str=None, # None | 'mask_to_img' | 'img_to_mask' (resize img to match mask shape)
    grid_layout:str="row", # 'row' | 'column' |'square'
    auto_pad_if_not_same_size=True,
    verbose :int = 1,
):
    """
    Create a grid of images from paths, optionally with masks overlaid.
    """
    from pathlib import Path
    import PIL.Image
    import numpy as np
    import torchvision.utils as vutils
    import torch
    
    images = []
    for i, img in enumerate(imgs):
        if isinstance(img, PIL.Image.Image):
            pass
        else:
            if verbose>0:
                print(f"{img.shape=}")
            img = to__image_in_npArr(img)
        if isinstance(img, np.ndarray):
            img = PIL.Image.fromarray(img)
        # else:
        #     raise TypeError(f"Images must be PIL Image or numpy array{type(img)}")
        
        if not isinstance(img, PIL.Image.Image):
            raise TypeError(f"Images must be PIL Image or numpy array{type(img)}")
            
        img_tensor = torch.tensor(np.array(img).transpose(2, 0, 1)) / 255.0
        
        
        if masks is not None:
            mask = masks[i]
            if isinstance(mask, np.ndarray):
                mask = PIL.Image.fromarray(mask)
                
            if not mask.mode == 'L':
                mask = mask.convert('L')
            
            if resize_mode is None:
                pass
            elif resize_mode == "img_to_mask":
                img_tensor = torch.nn.functional.interpolate(
                    img_tensor.unsqueeze(0), 
                    size=(mask.height, mask.width), 
                    mode='bilinear', 
                    align_corners=False
                ).squeeze(0)
            elif resize_mode == "mask_to_img":
                mask = mask.resize((img_tensor.shape[2], img_tensor.shape[1]), PIL.Image.BILINEAR)
            else:
                raise NotImplementedError
            
            mask_np = np.array(mask) / 255.0
            mask_tensor = torch.tensor(mask_np).unsqueeze(0).repeat(3, 1, 1)
            if inv_mask:
                mask_tensor = 1 - mask_tensor
            # make masked pixels lighter
            img_tensor = img_tensor * 0.3 + 0.7 * mask_tensor
        
        # Apply auto padding if needed
        if auto_pad_if_not_same_size and i > 0 and (img_tensor.shape[1] != images[0].shape[1] or img_tensor.shape[2] != images[0].shape[2]):
            # Resize to match the first image dimensions
            img_tensor = torch.nn.functional.interpolate(
                img_tensor.unsqueeze(0),
                size=(images[0].shape[1], images[0].shape[2]),
                mode='bilinear',
                align_corners=False
            ).squeeze(0)
        images.append(img_tensor)
    
    if grid_layout == "row":
        grid_tensor = vutils.make_grid(images, nrow=len(images), )
    elif grid_layout == "column":
        grid_tensor = vutils.make_grid(images, nrow=1, )
    elif grid_layout == "square":
        grid_tensor = vutils.make_grid(images, nrow=int(np.sqrt(len(images))), )
    else:
        raise NotImplementedError
    
    grid = grid_tensor.numpy().transpose(1, 2, 0)
    grid = PIL.Image.fromarray((grid * 255).astype(np.uint8))
    
    if downsample > 1:
        original_size = grid.size
        new_size = (original_size[0] // downsample, original_size[1] // downsample)
        grid = grid.resize(new_size, PIL.Image.LANCZOS)
    
    if grid_path is not None:
        grid_path = Path(grid_path)
        grid_path.parent.mkdir(parents=False, exist_ok=True)
        grid.save(grid_path)
        if verbose>-1:
            print(f"saved {grid_path}")
    
    return grid

def img_paths_2_grid_A(
    paths, # paths of rgb img
    # any mask option makes masked pixels lighter per image
    mask_paths=None,
    path_img_2_path_mask=None, # callback to convert RGB image path to mask path
    # if provided, save grid
    grid_path=None,
    # other settings
    downsample=1, # downsample factor for the grid
    inv_mask:bool=False,
    resize_mode:str=None, # None | 'mask_to_img' | 'img_to_mask' (resize image to match mask shape)
    grid_layout:str="row", # 'row' | 'column' |'square'
    auto_pad_if_not_same_size=True,
):
    """
    Create a grid of images from paths, optionally with masks overlaid.
    """
    import PIL.Image
    
    # Load images from paths
    imgs = [PIL.Image.open(path).convert('RGB') for path in paths]
    
    # Load masks if provided
    masks = None
    if mask_paths is not None:
        masks = [PIL.Image.open(mask_path).convert('L') for mask_path in mask_paths]
    elif path_img_2_path_mask is not None:
        masks = [PIL.Image.open(path_img_2_path_mask(path)).convert('L') for path in paths]
    
    # Call the img_2_grid_A function
    return imgs_2_grid_A(
        imgs=imgs,
        masks=masks,
        grid_path=grid_path,
        downsample=downsample,
        inv_mask=inv_mask,
        resize_mode=resize_mode,
        grid_layout=grid_layout,
        auto_pad_if_not_same_size=auto_pad_if_not_same_size,
    )



def save_any_A(
    a,
    path=None, # only valid when !dont_save
    dont_save = False,
    # log
    print_info :bool = True,
    value_range: tuple = None,  # (min, max) tuple to specify value range, if None then auto determine
):
    """
    can auto determine or specify by param:
        data shape mode:
            ...,1/3/4,h,w ; ...,h,w,1/3/4 ;  
        value range:
            0-1 ; -1~1 ; 0-255

    after scaling to 0-255, save a grid containing two images:
        scaled image
        contrast-adjusted scaled image via linear transform so min=0 and max=255
    """
    a:np.ndarray = to__image_in_npArr(a)
    a = a.copy()
    if print_info:
        import torch;  from .torch_util import custom_repr_v3
        print(custom_repr_v3(torch.Tensor(a)))
    while(a.ndim>3):
        a=a[0]
    #-----------now a is chw | hwc --------------------------------------------------------
    if a.ndim > 2:
        if a.shape[-3] <= 4:
            if a.shape[-3] <= a.shape[-1] and a.shape[-3] <= a.shape[-2]:
                # assume the -3 axis is the channel dimension; convert chw -> hwc
                a = a.transpose(1, 2, 0)  # chw -> hwc
    else: # ndim==2
        a = np.expand_dims(a, axis=-1)  # hw -> hwc
    #-----------now a is hwc --------------------------------------------------------
    if value_range is None: # Auto determine
        mean = np.mean(a)
        std = np.std(a)
        min_ = np.min(a)
        max_ = np.max(a)
        if a.dtype == np.uint8 or a.dtype == np.int32 or a.dtype == np.int64:
            range_ = (0, 255)
        elif a.dtype == bool:
            range_ = (0, 1)
        elif max_ > 100:
            range_ = (0, 255)
        elif mean > 1:
            range_ = (0, 255)
        elif min_ <= -1 or mean < 0 : # treat as range -1 to 1
            range_ = (-1, 1)
        else: # treat as range 0 to 1
            range_ = (0, 1)
        print(f"Auto determined {range_=}")
    else:
        range_ = value_range
    range_min, range_max = range_
    if a.dtype == bool:
        a = a.astype(np.uint8) * 255  # bool -> 0/255
    else:
        if range_min == 0 and range_max == 255:
            pass
        else:
            # Custom range, normalize to 0~255
            a = (a - range_min) / (range_max - range_min) * 255
    #-----------now a is hwc and scaled to 0~255 --------------------------------------------------------
    if a.shape[-1] == 1:
        a = np.repeat(a, 3, axis=-1)
    #-----------now a is hwc, 0~255, and channels==3/4 --------------------------------------------------------
    
    if 1:  # create contrast-adjusted version by linearly mapping min to 0 and max to 255
        a_contrast = a.copy().astype(np.float32)
        current_min = np.min(a_contrast)
        current_max = np.max(a_contrast)
        if current_max > current_min:  # avoid division by zero
            a_contrast = (a_contrast - current_min) / (current_max - current_min) * 255
        a = np.clip(a, 0, 255).astype(np.uint8)
        a_contrast = np.clip(a_contrast, 0, 255).astype(np.uint8)
    if dont_save:
        path = None
    else:
        if path is None:
            save_dir = Path("/tmp/scy_auto_save")
            save_dir.mkdir(exist_ok=True)
            import time
            timestamp = int(time.time() * 1000)  # milliseconds for uniqueness
            ext = "jpg" if a.shape[-1] <= 3 else "png" # Use jpg by default if num channels <= 3
            path = save_dir / f"auto_{timestamp}.{ext}"
        else:
            path = Path(path)
            path.parent.mkdir(exist_ok=True)
        path = str(path)
    grid = imgs_2_grid_A( # create grid with 2 images: original scaled + contrast adjusted
        imgs=[a, a_contrast],
        grid_path=path,
        grid_layout="row",
        verbose = -1,
    )
    if not dont_save: print(f"{path}")
    return grid