Limit combinations of backends and targets in demos and benchmark (#145)
Browse files* limit backend and target combination in demos and benchmark
* simpler version checking
crnn.py
CHANGED
|
@@ -43,12 +43,10 @@ class CRNN:
|
|
| 43 |
def _load_charset(self, charset):
|
| 44 |
return ''.join(charset.splitlines())
|
| 45 |
|
| 46 |
-
def
|
| 47 |
-
self._backendId =
|
|
|
|
| 48 |
self._model.setPreferableBackend(self._backendId)
|
| 49 |
-
|
| 50 |
-
def setTarget(self, target_id):
|
| 51 |
-
self._targetId = target_id
|
| 52 |
self._model.setPreferableTarget(self._targetId)
|
| 53 |
|
| 54 |
def _preprocess(self, image, rbbox):
|
|
|
|
| 43 |
def _load_charset(self, charset):
|
| 44 |
return ''.join(charset.splitlines())
|
| 45 |
|
| 46 |
+
def setBackendAndTarget(self, backendId, targetId):
|
| 47 |
+
self._backendId = backendId
|
| 48 |
+
self._targetId = targetId
|
| 49 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
|
|
|
| 50 |
self._model.setPreferableTarget(self._targetId)
|
| 51 |
|
| 52 |
def _preprocess(self, image, rbbox):
|
demo.py
CHANGED
|
@@ -15,38 +15,41 @@ from crnn import CRNN
|
|
| 15 |
sys.path.append('../text_detection_db')
|
| 16 |
from db import DB
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
try:
|
| 31 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 32 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 33 |
-
help_msg_backends += "; {:d}: TIMVX"
|
| 34 |
-
help_msg_targets += "; {:d}: NPU"
|
| 35 |
-
except:
|
| 36 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
| 37 |
|
| 38 |
parser = argparse.ArgumentParser(
|
| 39 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
| 40 |
-
parser.add_argument('--input', '-i', type=str,
|
| 41 |
-
|
| 42 |
-
parser.add_argument('--
|
| 43 |
-
|
| 44 |
-
parser.add_argument('--
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
parser.add_argument('--width', type=int, default=736,
|
| 47 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
| 48 |
parser.add_argument('--height', type=int, default=736,
|
| 49 |
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
args = parser.parse_args()
|
| 51 |
|
| 52 |
def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
|
|
@@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2
|
|
| 59 |
return output
|
| 60 |
|
| 61 |
if __name__ == '__main__':
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
# Instantiate DB for text detection
|
| 65 |
detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
|
| 66 |
inputSize=[args.width, args.height],
|
|
@@ -68,9 +72,10 @@ if __name__ == '__main__':
|
|
| 68 |
polygonThreshold=0.5,
|
| 69 |
maxCandidates=200,
|
| 70 |
unclipRatio=2.0,
|
| 71 |
-
backendId=
|
| 72 |
-
targetId=
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
# If input is an image
|
| 76 |
if args.input is not None:
|
|
@@ -161,4 +166,3 @@ if __name__ == '__main__':
|
|
| 161 |
|
| 162 |
# Visualize results in a new Window
|
| 163 |
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
| 164 |
-
|
|
|
|
| 15 |
sys.path.append('../text_detection_db')
|
| 16 |
from db import DB
|
| 17 |
|
| 18 |
+
# Check OpenCV version
|
| 19 |
+
assert cv.__version__ >= "4.7.0", \
|
| 20 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
| 21 |
+
|
| 22 |
+
# Valid combinations of backends and targets
|
| 23 |
+
backend_target_pairs = [
|
| 24 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
| 25 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
| 26 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
| 27 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
| 28 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
| 29 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
parser = argparse.ArgumentParser(
|
| 32 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
| 33 |
+
parser.add_argument('--input', '-i', type=str,
|
| 34 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
| 35 |
+
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx',
|
| 36 |
+
help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
|
| 37 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
| 38 |
+
help='''Choose one of the backend-target pair to run this demo:
|
| 39 |
+
{:d}: (default) OpenCV implementation + CPU,
|
| 40 |
+
{:d}: CUDA + GPU (CUDA),
|
| 41 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
| 42 |
+
{:d}: TIM-VX + NPU,
|
| 43 |
+
{:d}: CANN + NPU
|
| 44 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
| 45 |
parser.add_argument('--width', type=int, default=736,
|
| 46 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
| 47 |
parser.add_argument('--height', type=int, default=736,
|
| 48 |
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
|
| 49 |
+
parser.add_argument('--save', '-s', action='store_true',
|
| 50 |
+
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
|
| 51 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
| 52 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
| 53 |
args = parser.parse_args()
|
| 54 |
|
| 55 |
def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
|
|
|
|
| 62 |
return output
|
| 63 |
|
| 64 |
if __name__ == '__main__':
|
| 65 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
| 66 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
| 67 |
+
|
| 68 |
# Instantiate DB for text detection
|
| 69 |
detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
|
| 70 |
inputSize=[args.width, args.height],
|
|
|
|
| 72 |
polygonThreshold=0.5,
|
| 73 |
maxCandidates=200,
|
| 74 |
unclipRatio=2.0,
|
| 75 |
+
backendId=backend_id,
|
| 76 |
+
targetId=target_id)
|
| 77 |
+
# Instantiate CRNN for text recognition
|
| 78 |
+
recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
| 79 |
|
| 80 |
# If input is an image
|
| 81 |
if args.input is not None:
|
|
|
|
| 166 |
|
| 167 |
# Visualize results in a new Window
|
| 168 |
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
|
|