Limit combinations of backends and targets in demos and benchmark (#145)

Browse files

* limit backend and target combination in demos and benchmark

* simpler version checking

Files changed (2) hide show

crnn.py +3 -5
demo.py +35 -31

crnn.py CHANGED Viewed

@@ -43,12 +43,10 @@ class CRNN:
     def _load_charset(self, charset):
         return ''.join(charset.splitlines())
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):

     def _load_charset(self, charset):
         return ''.join(charset.splitlines())
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):

demo.py CHANGED Viewed

@@ -15,38 +15,41 @@ from crnn import CRNN
 sys.path.append('../text_detection_db')
 from db import DB
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
 args = parser.parse_args()
 def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
@@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2
     return output
 if __name__ == '__main__':
-    # Instantiate CRNN for text recognition
-    recognizer = CRNN(modelPath=args.model)
     # Instantiate DB for text detection
     detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
                   inputSize=[args.width, args.height],
@@ -68,9 +72,10 @@ if __name__ == '__main__':
                   polygonThreshold=0.5,
                   maxCandidates=200,
                   unclipRatio=2.0,
-                  backendId=args.backend,
-                  targetId=args.target
-    )
     # If input is an image
     if args.input is not None:
@@ -161,4 +166,3 @@ if __name__ == '__main__':
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(recognizer.name), original_image)

 sys.path.append('../text_detection_db')
 from db import DB
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx',
+                    help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate DB for text detection
     detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
                   inputSize=[args.width, args.height],
                   polygonThreshold=0.5,
                   maxCandidates=200,
                   unclipRatio=2.0,
+                  backendId=backend_id,
+                  targetId=target_id)
+    # Instantiate CRNN for text recognition
+    recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id)
     # If input is an image
     if args.input is not None:
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(recognizer.name), original_image)