Upload folder using huggingface_hub

bbc0514 verified 20 days ago

10.2 kB

	from __future__ import annotations
	import torch
	import numpy as np
	import supervision as sv
	from pycocotools import mask as mask_utils
	import cv2
	import ffmpeg
	from PIL import Image
	import numpy as np
	from typing import List, Iterable
	from matplotlib import pyplot as plt

	class SAM2Tracker:
	def __init__(self, predictor) -> None:
	self.predictor = predictor
	self._prompted = False

	def prompt_first_frame(self, frame: np.ndarray, detections: sv.Detections) -> None:
	if len(detections) == 0:
	raise ValueError("detections must contain at least one box")

	if detections.tracker_id is None:
	detections.tracker_id = list(range(1, len(detections) + 1))

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
	self.predictor.load_first_frame(frame)
	for xyxy, obj_id in zip(detections.xyxy, detections.tracker_id):
	bbox = np.asarray([xyxy], dtype=np.float32)
	self.predictor.add_new_prompt(
	frame_idx=0,
	obj_id=int(obj_id),
	bbox=bbox,
	)

	self._prompted = True

	def propagate(self, frame: np.ndarray) -> sv.Detections:
	if not self._prompted:
	raise RuntimeError("Call prompt_first_frame before propagate")

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
	tracker_ids, mask_logits = self.predictor.track(frame)

	tracker_ids = np.asarray(tracker_ids, dtype=np.int32)
	masks = (mask_logits > 0.0).cpu().numpy()
	masks = np.squeeze(masks).astype(bool)

	if masks.ndim == 2:
	masks = masks[None, ...]

	masks = np.array([
	sv.filter_segments_by_distance(mask, relative_distance=0.03, mode="edge")
	for mask in masks
	])

	xyxy = sv.mask_to_xyxy(masks=masks)
	detections = sv.Detections(xyxy=xyxy, mask=masks, tracker_id=tracker_ids)
	return detections

	def reset(self) -> None:
	self._prompted = False

	def get_crops_from_masks(frame: np.ndarray, masks: np.ndarray) -> list[np.ndarray]:
	"""
	Args:mask_index
	frame: (H, W, 3) image
	masks: (N, H, W) binary masks

	Returns:
	List of cropped images, one per mask. Each crop is a rectangular
	bounding box around the mask, with black pixels outside the mask.
	"""
	crops = []

	for mask in masks:

	# Find bounding box of the mask
	ys, xs = np.where(mask)
	if len(xs) == 0 or len(ys) == 0:
	# Empty mask → skip or return empty crop
	crops.append(np.zeros((0, 0, 3), dtype=frame.dtype))
	continue

	y_min, y_max = ys.min(), ys.max() + 1
	x_min, x_max = xs.min(), xs.max() + 1

	# Crop the frame and mask
	frame_crop = frame[y_min:y_max, x_min:x_max]
	mask_crop = mask[y_min:y_max, x_min:x_max]

	# Apply mask: keep pixels where mask is True, else black
	crop = np.zeros_like(frame_crop)
	crop[mask_crop] = frame_crop[mask_crop]

	crops.append(crop)

	return crops

	def f(detections: sv.Detections, track_history: dict, frame_index):

	for i in range(len(detections)):

	mask = detections.mask[i]
	rle = mask_utils.encode(np.asfortranarray(mask))
	track_history[int(detections.tracker_id[i])].append((frame_index, rle['counts']))


	def toRGB(img: np.ndarray):
	return cv2.cvtColor(img, code=cv2.COLOR_BGR2RGB)

	def read_frame_from_video(in_filename, frame_num):
	raw_bytes, err = (
	ffmpeg
	.input(in_filename)
	.filter('select', 'gte(n,{})'.format(frame_num))
	.output('pipe:', vframes=1, format='rawvideo', pix_fmt='rgb24')
	.global_args('-loglevel', 'error')
	.run(capture_stdout=True)
	)
	assert len(raw_bytes) == 1080 * 1920 * 3
	return np.frombuffer(raw_bytes, np.uint8).reshape(1, 1080, 1920, 3).copy()

	def read_consecutive_frames_from_video(in_filename, start_frame, num_frames) -> np.ndarray:

	out, err = ffmpeg.input(in_filename)\
	.output(
	'pipe:1',
	vf=f'select=between(n\\,{start_frame}\\,{start_frame + num_frames - 1})',
	vsync=0,
	vframes=num_frames,
	format='rawvideo',
	pix_fmt='rgb24'
	).global_args('-loglevel', 'error')\
	.run(capture_stdout=True, capture_stderr=True)

	W, H = 1920, 1080
	frame_size = W * H * 3
	frames = np.frombuffer(out, np.uint8)

	if frames.size != num_frames * frame_size:
	raise RuntimeError(
	f'Expected {num_frames * frame_size} bytes, got {frames.size}\n'
	f'ffmpeg stderr:\n{err.decode()}'
	)

	# frames.setflags(write=True)
	return frames.reshape(num_frames, H, W, 3).copy()

	def xywhn_to_xywh(xywhn:list, height:int, width:int):

	x,y,w,h = xywhn

	return [int(x * width), int(y * height), int(w * width), int(h * height)]

	def crop_frame_at_mask_from_bbox(frame: np.ndarray, mask: np.ndarray, bbox: list) -> np.array:

	x,y,w,h = bbox
	crop = frame[y: y+h, x: x+w]
	cropped_mask = mask[y: y+h, x: x+w]
	# from code import interact; interact(local=locals())
	crop[~cropped_mask] = np.array([0,0,0], dtype=np.uint8)

	return crop

	def find_consecutive_streaks(nums: list\|Iterable):

	if isinstance(nums, Iterable): nums = list(nums)
	if not nums:
	return []

	streaks = []
	start = nums[0]
	for i in range(1, len(nums)):
	if nums[i] != nums[i-1] + 1:
	stop = nums[i-1]
	streaks.append(range(start, stop + 1))
	start = nums[i]

	streaks.append(range(start, nums[-1] + 1))
	return streaks

	def save_loss_history(fpath, loss:float):

	with open(fpath, "a+") as f:
	f.write(f"{loss:.6f}\n")

	def save_loss_history_plot(loss_history: list[float], fpath):

	plt.plot(loss_history)
	plt.savefig(fpath)

	def save_checkpoint(
	path,
	model,
	optimizer,
	epoch,
	step,
	):

	ckpt = {
	"model": model.state_dict(),
	"optimizer": optimizer.state_dict(),
	"epoch": epoch,
	"step": step,
	}
	torch.save(ckpt, path)

	def load_checkpoint(
	path,
	model,
	optimizer,
	device="cuda"
	):
	ckpt = torch.load(path, map_location=device)

	model.load_state_dict(ckpt["model"])
	optimizer.load_state_dict(ckpt["optimizer"])

	epoch = ckpt.get("epoch", 0)
	step = ckpt.get("step", 0)

	return epoch, step

	def mask_iou_pair(m1, m2):
	inter = np.logical_and(m1, m2).sum()
	if inter == 0:
	return 0.0
	union = m1.sum() + m2.sum() - inter
	return inter / (union + 1e-6)


	def mask_nms(masks, scores, iou_thresh=0.6):
	order = np.argsort(-scores)
	keep = []
	suppressed = np.zeros(len(masks), dtype=bool)

	for i in order:
	if suppressed[i]:
	continue

	keep.append(i)

	for j in order:
	if j <= i or suppressed[j]:
	continue

	iou = mask_iou_pair(masks[i], masks[j])
	if iou > iou_thresh:
	suppressed[j] = True

	return keep

	def mask_iou(masks_t: np.ndarray, masks_t1):
	# Flatten
	N, H, W = masks_t.shape
	M = masks_t1.shape[0]

	masks_t = masks_t.reshape(N, -1).astype(float) # (N, HW)
	masks_t1 = masks_t1.reshape(M, -1).astype(float) # (M, HW)

	# Intersection: (N, M)
	intersection = masks_t @ masks_t1.T

	# Areas
	area_t = masks_t.sum(1, keepdims=True) # (N, 1)
	area_t1 = masks_t1.sum(1, keepdims=True) # (M, 1)

	# Union
	union = area_t + area_t1.T - intersection

	iou = intersection / (union + 1e-6)
	return iou # (N, M)

	COURT_KEYPOINT_COORDINATES = np.array([
	(0.0, 0.0),
	(0.0, 2.99),
	(0.0, 17.0),
	(0.0, 33.01),
	(0.0, 47.02),
	(0.0, 50.0),
	(5.25, 25.0),
	(13.92, 2.99),
	(13.92, 47.02),
	(19.0, 17.0),
	(19.0, 25.0),
	(19.0, 33.01),
	(27.4, 0.0),
	(29.01, 25.0),
	(27.4, 50.0),
	(46.99, 0.0),
	(46.99, 25.0),
	(46.99, 50.0),
	(66.61, 0.0),
	(65.0, 25.0),
	(66.61, 50.0),
	(75.0, 17.0),
	(75.0, 25.0),
	(75.0, 33.01),
	(80.09, 2.99),
	(80.09, 47.02),
	(88.75, 25.0),
	(94.0, 0.0),
	(94.0, 2.99),
	(94.0, 17.0),
	(94.0, 33.01),
	(94.0, 47.02),
	(94.0, 50.0)
	])

	def get_distance_cost_matrix(arr1:np.ndarray, arr2:np.ndarray, ord=1) :

	cost_matrix = np.empty(shape=(len(arr1), len(arr2)), dtype=np.float64)

	for i in range(len(arr1)):
	cost_matrix[i] = np.linalg.norm(arr1[i] - arr2, ord=ord, axis=-1)

	return torch.tensor(cost_matrix)

	def matcher_probs_custom_argmax(probs:np.ndarray, confidence_threshold=0.7):
	probs = probs.squeeze(0)
	pred = probs.argmax()
	# if matcher predicts the null prediction, but it is not confident
	if pred == len(probs) - 1 and probs[pred] < confidence_threshold:
	# predict the second most confident prediction if it has high weight
	second_best = probs[:-1].argmax()
	if probs[second_best] > 1.0 - confidence_threshold - 0.05:
	pred = second_best

	return pred

	def show_annotations(frame_, detections_):
	annotated_frame = frame_.copy()
	annotated_frame = sv.MaskAnnotator(color_lookup=sv.ColorLookup.TRACK).annotate(annotated_frame, detections_)
	annotated_frame = sv.LabelAnnotator(smart_position=True).annotate(annotated_frame, detections_, labels=list(str(i) for i in detections_.tracker_id))
	return Image.fromarray(annotated_frame)

	def annotate_frame(frame_, detections_):
	annotated_frame = frame_.copy()
	annotated_frame = sv.MaskAnnotator(color_lookup=sv.ColorLookup.TRACK).annotate(annotated_frame, detections_)
	annotated_frame = sv.LabelAnnotator(smart_position=True).annotate(annotated_frame, detections_, labels=list(str(i) for i in detections_.tracker_id))
	return annotated_frame

	if __name__ == "__main__":
	from code import interact
	frames = read_consecutive_frames_from_video("nba_sample_videos/batch2/SAC_LAL_1.mp4", 199, 1)
	# crop_frame_at_mask_from_bbox(np.zeros((1080, 1920, 3)), )
	interact(local=locals())