| | import torch |
| | import numpy as np |
| | import torch.nn.functional as F |
| | from pycocotools import mask as mask_utils |
| |
|
| |
|
| | def grounding_image_ecoder_preprocess(x, pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1), |
| | pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1), |
| | img_size=1024) -> torch.Tensor: |
| | """Normalize pixel values and pad to a square input.""" |
| |
|
| | |
| | x = (x - pixel_mean) / pixel_std |
| |
|
| | |
| | h, w = x.shape[-2:] |
| | padh = img_size - h |
| | padw = img_size - w |
| | x = F.pad(x, (0, padw, 0, padh)) |
| |
|
| | return x |
| |
|
| |
|
| | def mask_to_rle_pytorch(tensor: torch.Tensor): |
| | """ |
| | Encodes masks to an uncompressed RLE, in the format expected by |
| | pycoco tools. |
| | """ |
| | |
| | b, h, w = tensor.shape |
| | tensor = tensor.permute(0, 2, 1).flatten(1) |
| |
|
| | |
| | diff = tensor[:, 1:] ^ tensor[:, :-1] |
| | change_indices = diff.nonzero() |
| |
|
| | |
| | out = [] |
| | for i in range(b): |
| | cur_idxs = change_indices[change_indices[:, 0] == i, 1] |
| | cur_idxs = torch.cat( |
| | [torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device), cur_idxs + 1, |
| | torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device), ] |
| | ) |
| | btw_idxs = cur_idxs[1:] - cur_idxs[:-1] |
| | counts = [] if tensor[i, 0] == 0 else [0] |
| | counts.extend(btw_idxs.detach().cpu().tolist()) |
| | out.append({"size": [h, w], "counts": counts}) |
| |
|
| | return out |
| |
|
| |
|
| | def mask_to_rle_numpy(mask: np.ndarray): |
| | """ |
| | Encodes masks to an uncompressed RLE, in the format expected by |
| | pycoco tools. |
| | """ |
| | h, w = mask.shape |
| |
|
| | |
| | mask = np.transpose(mask).flatten() |
| |
|
| | |
| | diff = mask[1:] ^ mask[:-1] |
| | change_indices = np.where(diff)[0] |
| |
|
| | |
| | cur_idxs = np.concatenate( |
| | ([0], change_indices + 1, [h * w]) |
| | ) |
| | btw_idxs = cur_idxs[1:] - cur_idxs[:-1] |
| | counts = [] if mask[0] == 0 else [0] |
| | counts.extend(btw_idxs.tolist()) |
| |
|
| | return {"size": [h, w], "counts": counts} |
| |
|
| |
|
| | def coco_encode_rle(uncompressed_rle): |
| | h, w = uncompressed_rle["size"] |
| | rle = mask_utils.frPyObjects(uncompressed_rle, h, w) |
| | rle["counts"] = rle["counts"].decode("utf-8") |
| |
|
| | return rle |
| |
|
| |
|
| | def compute_iou(mask1, mask2): |
| | intersection = np.logical_and(mask1, mask2) |
| | union = np.logical_or(mask1, mask2) |
| | iou = np.sum(intersection) / np.sum(union) |
| |
|
| | return iou |
| |
|
| |
|
| | def bbox_to_x1y1x2y2(bbox): |
| | x1, y1, w, h = bbox |
| | bbox = [x1, y1, x1 + w, y1 + h] |
| |
|
| | return bbox |
| |
|