import pandas as pd import numpy as np import os import shutil from datetime import datetime from timeit import default_timer as timer from utools import load_relevant_data_subset,mark_pred from utools import softmax import mediapipe as mp import cv2 import json N=3 ROWS_PER_FRAME=543 with open('sign_to_prediction_index_map_cn.json', 'r') as f: person_dict = json.load(f) inverse_dict=dict([val,key] for key,val in person_dict.items()) def r_holistic(video_path): mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles mp_holistic = mp.solutions.holistic frame_number = 0 frame = [] type_ = [] index = [] x = [] y = [] z = [] cap=cv2.VideoCapture(video_path) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_size = (frame_width, frame_height) fourcc = cv2.VideoWriter_fourcc(*"VP80") #cv2.VideoWriter_fourcc('H.264') output_video = "output_recorded_holistic.webm" out = cv2.VideoWriter(output_video, fourcc, int(fps/N), frame_size) with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic: n=0 while cap.isOpened(): frame_number+=1 n+=1 ret, image = cap.read() if not ret: break if n%N==0: image.flags.writeable = False image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) #mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=RGB_frame) results = holistic.process(image) # Draw landmark annotation on the image. image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) mp_drawing.draw_landmarks( image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_contours_style()) mp_drawing.draw_landmarks( image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_styles .get_default_pose_landmarks_style()) # Flip the image horizontally for a selfie-view display. #if cv2.waitKey(5) & 0xFF == 27: out.write(image) if(results.face_landmarks is None): for i in range(468): frame.append(frame_number) type_.append("face") index.append(ind) x.append(None) y.append(None) z.append(None) else: for ind,val in enumerate(results.face_landmarks.landmark): frame.append(frame_number) type_.append("face") index.append(ind) x.append(val.x) y.append(val.y) z.append(val.z) #left hand if(results.left_hand_landmarks is None): for i in range(21): frame.append(frame_number) type_.append("left_hand") index.append(ind) x.append(None) y.append(None) z.append(None) else: for ind,val in enumerate(results.left_hand_landmarks.landmark): frame.append(frame_number) type_.append("left_hand") index.append(ind) x.append(val.x) y.append(val.y) z.append(val.z) #pose if(results.pose_landmarks is None): for i in range(33): frame.append(frame_number) type_.append("pose") index.append(ind) x.append(None) y.append(None) z.append(None) else: for ind,val in enumerate(results.pose_landmarks.landmark): frame.append(frame_number) type_.append("pose") index.append(ind) x.append(val.x) y.append(val.y) z.append(val.z) #right hand if(results.right_hand_landmarks is None): for i in range(21): frame.append(frame_number) type_.append("right_hand") index.append(ind) x.append(None) y.append(None) z.append(None) else: for ind,val in enumerate(results.right_hand_landmarks.landmark): frame.append(frame_number) type_.append("right_hand") index.append(ind) x.append(val.x) y.append(val.y) z.append(val.z) #break cap.release() out.release() cv2.destroyAllWindows() df1 = pd.DataFrame({ "frame" : frame, "type" : type_, "landmark_index" : index, "x" : x, "y" : y, "z" : z }) aa=load_relevant_data_subset(df1) model_path_1='model_1.tflite' model_path_2='model_2.tflite' model_path_3='model_3.tflite' #interpreter = tflite.Interpreter(model_path_1) #found_signatures = list(interpreter.get_signature_list().keys()) #prediction_fn = interpreter.get_signature_runner("serving_default") output_1 = mark_pred(model_path_1,aa) output_2 = mark_pred(model_path_2,aa) output_3 = mark_pred(model_path_3,aa) output=softmax(output_1['outputs'])+softmax(output_2['outputs'])+softmax(output_3['outputs']) sign = output.argmax() lb = inverse_dict.get(sign) yield output_video,lb