Spaces:
Runtime error
Runtime error
File size: 5,145 Bytes
e43f2e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import cv2
import numpy as np
class Ocr:
def __init__(self, root_path_model:str,
detection_config:dict = None, recognition_config:dict = None) -> None:
self.detection_config = detection_config
self.recognition_config = recognition_config
if detection_config:
from .char_detection import CharDetection
self.detection_model = CharDetection(
root_path=root_path_model, model_config=detection_config)
if recognition_config:
from .char_recognition import CharRecognition
self.recog_model = CharRecognition(
root_path=root_path_model, model_config=recognition_config)
def char_detection(self, image:np.array, image_size:int = 244,
threshold:float = 0.5, boxes_ori:bool = True, det_sorted:bool = True) -> dict:
'''
Detect character from image
@params:
- image: np.array -> image to be detected
- image_size: int -> size of image to be detected
- threshold: float -> threshold for detection
- boxes_ori: bool -> if True, return boxes in original image
- det_sorted: bool -> if True, return boxes in sorted order
@return:
- result: {'boxes': np.array, 'confidences': np.array, 'labels': np.array}
'''
# assert error if model is not loaded
assert self.detection_config, 'Model is not loaded'
result_det = self.detection_model.detect(image, image_size,
boxes_ori, threshold, sorted=det_sorted)
return result_det
def char_recognition(self, image: np.array) -> dict:
'''
Read single character from image
@params:
- image: np.array -> image to be read
@return:
- result: {'text': str, 'conf': float}
'''
# assert error if model is not loaded
assert self.recognition_config, 'Model is not loaded'
return self.recog_model.recognition(image)
def __calculate_confidence(self, result:dict) -> float:
return round(sum([i['conf'] for i in result])/len(result),2)
def __marger_text(self, result:dict) -> str:
return ''.join([i['text'] for i in result])
def visualize_result(self, image:np.array, results:list) -> np.array:
'''
Visualize result of OCR
@params:
- image: np.array -> image to be draw
- results: list -> result of OCR(output type advanced)
@return:
- image: np.array -> image with result
'''
# Draw boxes
for box in results:
x_min, y_min, x_max, y_max = box['box']
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)
# Draw text
text = box['text']
cv2.putText(image, text, (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
return image
def ocr(self, image:np.array, det_size:int = 244, boxes_ori:bool = True,
det_threshold:float=0.5, det_sorted:bool=True, output_type:str='normal') -> None:
'''
Read text from image using Text Detection and Recognition
@params:
- image: np.array -> image to be read
- det_size: int -> size of image to be detected
- boxes_ori: bool -> if True, return boxes in original image
- det_threshold: float -> threshold for detection
- det_sorted: bool -> if True, return boxes in sorted order
- output_type: str -> 'normal' or 'advanced'
@return:
- result: result of detection and recognition
- normal : {'text': str, 'conf': float}
- advanced : [{'text': str, 'conf': float, 'box': tuple}]
'''
# assert error if output type not in ['normal', 'advanced']
assert output_type in ['normal', 'advanced'], 'Output type is not valid'
# Char detection
res_detection = self.char_detection(image=image, image_size=det_size,
threshold=det_threshold, boxes_ori=boxes_ori, det_sorted=det_sorted)
boxes = res_detection['boxes'].astype(int)
# Char recognition
result_recognition = list()
for box in boxes:
x_min, y_min, x_max, y_max = box
image_crop = image[y_min:y_max, x_min:x_max]
res_recognition = self.char_recognition(image_crop)
if output_type == 'normal':
result_recognition.append(res_recognition)
elif output_type == 'advanced':
result_recognition.append({
'text': res_recognition['text'],
'conf': res_recognition['conf'],
'box': box})
# Output type
if output_type == 'normal':
confidence = self.__calculate_confidence(result_recognition)
text = self.__marger_text(result_recognition)
result = {'confidence': confidence, 'text': text}
elif output_type == 'advanced':
result =result_recognition
return result
if __name__ == '__main__':
import os
import cv2
import sys
import glob
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
from configs.models import *
root_model = DIRECTORY_MODEL
config_det = MODELS['char_detection']
config_recog = MODELS['char_recognition']
ocr = Ocr(root_path_model=root_model,
detection_config=config_det, recognition_config=config_recog)
for i in glob.glob('/Users/alimustofa/Halotec/Datasets/JASAMARGA/REPORT/LPR/old_images/A122_1657688221.jpg'):
image = cv2.imread(i)
result = ocr.ocr(image, output_type='advanced', det_threshold=0.9)
text_ocr = ''.join([i['text'] for i in result])
cv2.imwrite(text_ocr+'.jpg', ocr.visualize_result(image, result))
print(
''.join([i['text'] for i in result]),
) |