Spaces:
Runtime error
Runtime error
| import cv2 | |
| import numpy as np | |
| import yolov5 | |
| class CropVideo: | |
| """Base class for cropping a video frame-by-frame using various object | |
| detection method such as YOLO or cv2.Tracker | |
| Warning: This class should not be used directly. | |
| Use derived classes instead. | |
| Parameters: | |
| method : name of the object detection method | |
| model_path : path to object detection model | |
| """ | |
| def __init__(self, method=None): | |
| self.method = method | |
| def video_crop(self, video_frames): | |
| """Crops given list of frames by detecting object using different | |
| methods such as YOLO or cv2.Tracker. | |
| Args: | |
| video_frames: A list of numpy arrays representing the input images | |
| Returns: | |
| A numpy array containing cropped frames | |
| """ | |
| raise NotImplementedError | |
| class YOLOCrop(CropVideo): | |
| """Class for cropping a video frame-by-frame using YOLO object detection | |
| method | |
| Parameters : | |
| cropping_model_path : path to object detection model | |
| """ | |
| def __init__(self, method=None, model_path=None): | |
| super().__init__('yolo') | |
| self.model_path = model_path or 'models/yolo/yolov5x.pt' | |
| self.load_model(self.model_path) | |
| def load_model(self, model_path): | |
| """Loads object detection model. | |
| """ | |
| self.model = yolov5.load(model_path) | |
| self.model.classes = 0 | |
| def get_yolo_bbox(self, frame): | |
| """Runs YOLO object detection on an input image. | |
| Args: | |
| frame: A [height, width, 3] numpy array representing the input image | |
| Returns: | |
| A list conating boundig box parameters [x_min, y_min, x_max, y_max] | |
| """ | |
| results = self.model(frame) | |
| predictions = results.pred[0] | |
| boxes = predictions[:, :4].numpy().astype(np.int32) | |
| if len(boxes) == 0: | |
| return [] | |
| elif len(boxes) == 1: | |
| return list(boxes[0]) | |
| else: | |
| area = [] | |
| for i in boxes: | |
| area.append(cv2.contourArea(np.array([[i[:2]], [i[2:]]]))) | |
| largest_bbox = boxes[np.argmax(np.array(area))] | |
| return list(largest_bbox) | |
| def video_crop(self, video_frames): | |
| """Crops given list of frames by detecting object using YOLO | |
| Args: | |
| video_frames: A list of numpy arrays representing the input images | |
| Returns: | |
| A numpy array containing cropped frames | |
| """ | |
| x_width_start = [] | |
| y_height_start = [] | |
| x_width_end = [] | |
| y_height_end = [] | |
| frame_height, frame_width = 0, 0 | |
| widths = [] | |
| heights = [] | |
| for frame in video_frames: | |
| frame_height, frame_width, _ = frame.shape | |
| bbox = self.get_yolo_bbox(frame) | |
| if len(bbox) == 0: | |
| continue | |
| else: | |
| x_width_start.append(int(max(bbox[0] - 100, 0))) | |
| y_height_start.append(int(max(bbox[1] - 100, 0))) | |
| x_width_end.append(int(min(bbox[2] + 100, frame.shape[1]))) | |
| y_height_end.append(int(min(bbox[3] + 100, frame.shape[0]))) | |
| widths.append(x_width_end[-1] - x_width_start[-1]) | |
| heights.append(y_height_end[-1] - y_height_start[-1]) | |
| width = np.percentile(np.array(widths), 95) | |
| height = np.percentile(np.array(heights), 95) | |
| box_len = int(max(width, height)) | |
| cropped_frames = [] | |
| for i in range(len(widths)): | |
| frame = video_frames[i] | |
| xs = x_width_start[i] | |
| xe = x_width_start[i] + box_len | |
| ys = y_height_start[i] | |
| ye = y_height_start[i] + box_len | |
| if ye > frame_height: | |
| ye = frame_height | |
| ys = max(0, ye - box_len) | |
| if xe > frame_width: | |
| xe = frame_width | |
| xs = max(0, xe - box_len) | |
| cropped = frame[int(ys): int(ye), int(xs): int(xe), :] | |
| cropped_frames.append(np.array(cropped)) | |
| return np.array(cropped_frames) | |
| class TrackerCrop(YOLOCrop): | |
| def __init__(self, model_path=None): | |
| super().__init__(method='yolo') | |
| self.tracker = cv2.TrackerMIL.create() | |
| def expand_bbox(bbox, frame_shape): | |
| """Expands given bounding box by 50 pixels | |
| Args: | |
| bbox: A list [x,y, width, height] consits of bounding box | |
| parameters of | |
| object | |
| frame_shape: (height, width) of a frame | |
| """ | |
| bbox[0] = max(bbox[0] - 50, 0) | |
| bbox[1] = max(bbox[1] - 50, 0) | |
| bbox[2] = min(bbox[3] + 50, frame_shape[1] - bbox[0] - 1) | |
| bbox[3] = min(bbox[3] + 50, frame_shape[0] - bbox[1] - 1) | |
| def pad_bbox(crop_frame, box_len): | |
| """Pads given cropped frame | |
| Args: | |
| crop_frame: A numpy array representing the cropped frame | |
| box_len: An integer value representing maximum out of width and height | |
| Returns: | |
| A numpy array containing cropped frame with padding | |
| """ | |
| if box_len > crop_frame.shape[0] or box_len > crop_frame.shape[1]: | |
| crop_frame = np.pad( | |
| crop_frame, pad_width=( | |
| (0, box_len - crop_frame.shape[0]), | |
| (0, box_len - crop_frame.shape[1]), (0, 0)) | |
| ) | |
| return crop_frame | |
| def clip_coordinates(x, y, box_len, frame_shape): | |
| """Clips (x,y) coordinates representing the centre of bounding box | |
| Args: | |
| x: x-coordinate of the centre of bounding box | |
| y: y-coordinate of the centre of bounding box | |
| box_len: An integer value representing maximum out of width and height | |
| frame_shape: (height, width) of a frame | |
| Returns: | |
| (x,y) clipped coordinates | |
| """ | |
| if x + box_len > frame_shape[1]: | |
| diff = x + box_len - frame_shape[1] | |
| x = max(0, x - diff) | |
| if y + box_len > frame_shape[0]: | |
| diff = y + box_len - frame_shape[0] | |
| y = max(0, y - diff) | |
| return (x, y) | |
| def video_crop(self, video_frames): | |
| """Crops given list of frames by detecting object using cv2.Tracker | |
| Args: | |
| video_frames: A list of numpy arrays representing the input images | |
| Returns: | |
| A numpy array containing cropped frames | |
| """ | |
| frame = video_frames[0] | |
| bbox = self.get_yolo_bbox(frame) | |
| TrackerCrop.expand_bbox(bbox, frame.shape) | |
| self.tracker.init(frame, bbox) | |
| output_frame_list = [] | |
| for frame in video_frames: | |
| _, bbox = self.tracker.update(frame) | |
| x, y, w, h = bbox | |
| box_len = max(w, h) | |
| x, y = TrackerCrop.clip_coordinates(x, y, box_len, frame.shape) | |
| crop_frame = np.array(frame[y:y + box_len, x:x + box_len, :]) | |
| crop_frame = TrackerCrop.pad_bbox(crop_frame, box_len) | |
| output_frame_list.append(crop_frame) | |
| output_frame_array = np.array(output_frame_list) | |
| return output_frame_array | |