tuandunghcmut
/

vlm_clone_2

Model card Files Files and versions

vlm_clone_2 / groundingLMM /eval /utils.py

tuandunghcmut's picture

Add files using upload-large-folder tool

391089d verified 11 months ago

history blame contribute delete

2.74 kB

	import torch
	import numpy as np
	import torch.nn.functional as F
	from pycocotools import mask as mask_utils


	def grounding_image_ecoder_preprocess(x, pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
	pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1),
	img_size=1024) -> torch.Tensor:
	"""Normalize pixel values and pad to a square input."""

	# Normalize colors
	x = (x - pixel_mean) / pixel_std

	# Pad
	h, w = x.shape[-2:]
	padh = img_size - h
	padw = img_size - w
	x = F.pad(x, (0, padw, 0, padh))

	return x


	def mask_to_rle_pytorch(tensor: torch.Tensor):
	"""
	Encodes masks to an uncompressed RLE, in the format expected by
	pycoco tools.
	"""
	# Put in fortran order and flatten h,w
	b, h, w = tensor.shape
	tensor = tensor.permute(0, 2, 1).flatten(1)

	# Compute change indices
	diff = tensor[:, 1:] ^ tensor[:, :-1]
	change_indices = diff.nonzero()

	# Encode run length
	out = []
	for i in range(b):
	cur_idxs = change_indices[change_indices[:, 0] == i, 1]
	cur_idxs = torch.cat(
	[torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device), cur_idxs + 1,
	torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device), ]
	)
	btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
	counts = [] if tensor[i, 0] == 0 else [0]
	counts.extend(btw_idxs.detach().cpu().tolist())
	out.append({"size": [h, w], "counts": counts})

	return out


	def mask_to_rle_numpy(mask: np.ndarray):
	"""
	Encodes masks to an uncompressed RLE, in the format expected by
	pycoco tools.
	"""
	h, w = mask.shape

	# Put in fortran order and flatten h,w
	mask = np.transpose(mask).flatten()

	# Compute change indices
	diff = mask[1:] ^ mask[:-1]
	change_indices = np.where(diff)[0]

	# Encode run length
	cur_idxs = np.concatenate(
	([0], change_indices + 1, [h * w])
	)
	btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
	counts = [] if mask[0] == 0 else [0]
	counts.extend(btw_idxs.tolist())

	return {"size": [h, w], "counts": counts}


	def coco_encode_rle(uncompressed_rle):
	h, w = uncompressed_rle["size"]
	rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
	rle["counts"] = rle["counts"].decode("utf-8") # Necessary to serialize with json

	return rle


	def compute_iou(mask1, mask2):
	intersection = np.logical_and(mask1, mask2)
	union = np.logical_or(mask1, mask2)
	iou = np.sum(intersection) / np.sum(union)

	return iou


	def bbox_to_x1y1x2y2(bbox):
	x1, y1, w, h = bbox
	bbox = [x1, y1, x1 + w, y1 + h]

	return bbox