| | import argparse |
| |
|
| | import numpy as np |
| | import cv2 as cv |
| |
|
| | |
| | opencv_python_version = lambda str_version: tuple(map(int, (str_version.split(".")))) |
| | assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"), \ |
| | "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python" |
| |
|
| | from mp_palmdet import MPPalmDet |
| |
|
| | |
| | backend_target_pairs = [ |
| | [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], |
| | [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], |
| | [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], |
| | [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], |
| | [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] |
| | ] |
| |
|
| | parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe') |
| | parser.add_argument('--input', '-i', type=str, |
| | help='Usage: Set path to the input image. Omit for using default camera.') |
| | parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', |
| | help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.') |
| | parser.add_argument('--backend_target', '-bt', type=int, default=0, |
| | help='''Choose one of the backend-target pair to run this demo: |
| | {:d}: (default) OpenCV implementation + CPU, |
| | {:d}: CUDA + GPU (CUDA), |
| | {:d}: CUDA + GPU (CUDA FP16), |
| | {:d}: TIM-VX + NPU, |
| | {:d}: CANN + NPU |
| | '''.format(*[x for x in range(len(backend_target_pairs))])) |
| | parser.add_argument('--score_threshold', type=float, default=0.8, |
| | help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.') |
| | parser.add_argument('--nms_threshold', type=float, default=0.3, |
| | help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') |
| | parser.add_argument('--save', '-s', action='store_true', |
| | help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') |
| | parser.add_argument('--vis', '-v', action='store_true', |
| | help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') |
| | args = parser.parse_args() |
| |
|
| | def visualize(image, results, print_results=False, fps=None): |
| | output = image.copy() |
| |
|
| | if fps is not None: |
| | cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) |
| |
|
| | for idx, palm in enumerate(results): |
| | score = palm[-1] |
| | palm_box = palm[0:4] |
| | palm_landmarks = palm[4:-1].reshape(7, 2) |
| |
|
| | |
| | palm_box = palm_box.astype(np.int32) |
| | cv.putText(output, '{:.4f}'.format(score), (palm_box[0], palm_box[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 255, 0)) |
| |
|
| | |
| | cv.rectangle(output, (palm_box[0], palm_box[1]), (palm_box[2], palm_box[3]), (0, 255, 0), 2) |
| |
|
| | |
| | palm_landmarks = palm_landmarks.astype(np.int32) |
| | for p in palm_landmarks: |
| | cv.circle(output, p, 2, (0, 0, 255), 2) |
| |
|
| | |
| | if print_results: |
| | print('-----------palm {}-----------'.format(idx + 1)) |
| | print('score: {:.2f}'.format(score)) |
| | print('palm box: {}'.format(palm_box)) |
| | print('palm landmarks: ') |
| | for plm in palm_landmarks: |
| | print('\t{}'.format(plm)) |
| |
|
| | return output |
| |
|
| | if __name__ == '__main__': |
| | backend_id = backend_target_pairs[args.backend_target][0] |
| | target_id = backend_target_pairs[args.backend_target][1] |
| |
|
| | |
| | model = MPPalmDet(modelPath=args.model, |
| | nmsThreshold=args.nms_threshold, |
| | scoreThreshold=args.score_threshold, |
| | backendId=backend_id, |
| | targetId=target_id) |
| |
|
| | |
| | if args.input is not None: |
| | image = cv.imread(args.input) |
| |
|
| | |
| | results = model.infer(image) |
| | if len(results) == 0: |
| | print('Hand not detected') |
| |
|
| | |
| | image = visualize(image, results, print_results=True) |
| |
|
| | |
| | if args.save: |
| | print('Resutls saved to result.jpg\n') |
| | cv.imwrite('result.jpg', image) |
| |
|
| | |
| | if args.vis: |
| | cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) |
| | cv.imshow(args.input, image) |
| | cv.waitKey(0) |
| | else: |
| | deviceId = 0 |
| | cap = cv.VideoCapture(deviceId) |
| |
|
| | tm = cv.TickMeter() |
| | while cv.waitKey(1) < 0: |
| | hasFrame, frame = cap.read() |
| | if not hasFrame: |
| | print('No frames grabbed!') |
| | break |
| |
|
| | |
| | tm.start() |
| | results = model.infer(frame) |
| | tm.stop() |
| |
|
| | |
| | frame = visualize(frame, results, fps=tm.getFPS()) |
| |
|
| | |
| | cv.imshow('MPPalmDet Demo', frame) |
| |
|
| | tm.reset() |
| |
|