Spaces:
Runtime error
Runtime error
| import tensorflow as tf | |
| import cv2 | |
| import numpy as np | |
| import config | |
| from logger import logging | |
| def preprocess_video(video_path : str) -> tuple[tf.Tensor, list] : | |
| """ | |
| Preprocess the video by keeping the required number of frames, | |
| resizing the frames and normalizing the frames. | |
| params : | |
| video_path : path of the video file | |
| returns : | |
| Returns tuple (input_tensor, frame_list) | |
| input_tensor : video with required number of frames and size | |
| frame_list : list of required number of frames | |
| """ | |
| logging.info(">>> Preprocessing the video....") | |
| # load the video | |
| video_capture = cv2.VideoCapture(video_path) | |
| # the number of frames in the original video | |
| original_number_of_frames = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) | |
| # gap between two consecutive frames to capture | |
| frame_interval = int(original_number_of_frames / config.FRAME_NUM) | |
| new_video , frame_list = [] , [] | |
| for i in range(0, config.FRAME_NUM ): | |
| video_capture.set(cv2.CAP_PROP_POS_FRAMES, i*frame_interval) | |
| success, frame = video_capture.read() | |
| if not success : | |
| logging.info("video loading failed") | |
| break | |
| frame_list.append(frame) | |
| # Resize the Frame to fixed height and width. | |
| resized_frame = cv2.resize(frame, (config.FRAME_HT, config.FRAME_WD)) | |
| # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1 | |
| normalized_frame = resized_frame / 255 | |
| # Append the normalized frame into the frames list | |
| new_video.append(normalized_frame) | |
| new_video_array = np.asarray(new_video) | |
| input_tensor = tf.expand_dims(new_video_array, axis=0) | |
| video_capture.release() | |
| logging.info("Video processing successful.") | |
| return input_tensor, frame_list | |
| # Get top_k labels and probabilities | |
| def get_top_k(probs, label_map,k=5 ): | |
| """Outputs the top k model labels and probabilities on the given video. | |
| Args: | |
| probs: probability tensor of shape (num_frames, num_classes) that represents | |
| the probability of each class on each frame. | |
| k: the number of top predictions to select. | |
| label_map: a list of labels to map logit indices to label strings. | |
| Returns: | |
| a tuple of the top-k labels and probabilities. | |
| """ | |
| # Sort predictions to find top_k | |
| indices = tf.argsort(probs, direction='DESCENDING').numpy()[0][:k] | |
| # collect the labels of top_k predictions | |
| labels = tf.gather(label_map, indices).numpy() | |
| # decode lablels | |
| labels = [label.decode('utf8') for label in labels] | |
| # top_k probabilities of the predictions | |
| top_probs = tf.gather(probs[0], indices).numpy() | |
| output = dict() | |
| for label, prob in zip(labels, top_probs): | |
| output[label] = float(prob) / 100 | |
| return output | |