Alimustoofaa's picture
first commit
e43f2e6
import cv2
import numpy as np
class Ocr:
def __init__(self, root_path_model:str,
detection_config:dict = None, recognition_config:dict = None) -> None:
self.detection_config = detection_config
self.recognition_config = recognition_config
if detection_config:
from .char_detection import CharDetection
self.detection_model = CharDetection(
root_path=root_path_model, model_config=detection_config)
if recognition_config:
from .char_recognition import CharRecognition
self.recog_model = CharRecognition(
root_path=root_path_model, model_config=recognition_config)
def char_detection(self, image:np.array, image_size:int = 244,
threshold:float = 0.5, boxes_ori:bool = True, det_sorted:bool = True) -> dict:
'''
Detect character from image
@params:
- image: np.array -> image to be detected
- image_size: int -> size of image to be detected
- threshold: float -> threshold for detection
- boxes_ori: bool -> if True, return boxes in original image
- det_sorted: bool -> if True, return boxes in sorted order
@return:
- result: {'boxes': np.array, 'confidences': np.array, 'labels': np.array}
'''
# assert error if model is not loaded
assert self.detection_config, 'Model is not loaded'
result_det = self.detection_model.detect(image, image_size,
boxes_ori, threshold, sorted=det_sorted)
return result_det
def char_recognition(self, image: np.array) -> dict:
'''
Read single character from image
@params:
- image: np.array -> image to be read
@return:
- result: {'text': str, 'conf': float}
'''
# assert error if model is not loaded
assert self.recognition_config, 'Model is not loaded'
return self.recog_model.recognition(image)
def __calculate_confidence(self, result:dict) -> float:
return round(sum([i['conf'] for i in result])/len(result),2)
def __marger_text(self, result:dict) -> str:
return ''.join([i['text'] for i in result])
def visualize_result(self, image:np.array, results:list) -> np.array:
'''
Visualize result of OCR
@params:
- image: np.array -> image to be draw
- results: list -> result of OCR(output type advanced)
@return:
- image: np.array -> image with result
'''
# Draw boxes
for box in results:
x_min, y_min, x_max, y_max = box['box']
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)
# Draw text
text = box['text']
cv2.putText(image, text, (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
return image
def ocr(self, image:np.array, det_size:int = 244, boxes_ori:bool = True,
det_threshold:float=0.5, det_sorted:bool=True, output_type:str='normal') -> None:
'''
Read text from image using Text Detection and Recognition
@params:
- image: np.array -> image to be read
- det_size: int -> size of image to be detected
- boxes_ori: bool -> if True, return boxes in original image
- det_threshold: float -> threshold for detection
- det_sorted: bool -> if True, return boxes in sorted order
- output_type: str -> 'normal' or 'advanced'
@return:
- result: result of detection and recognition
- normal : {'text': str, 'conf': float}
- advanced : [{'text': str, 'conf': float, 'box': tuple}]
'''
# assert error if output type not in ['normal', 'advanced']
assert output_type in ['normal', 'advanced'], 'Output type is not valid'
# Char detection
res_detection = self.char_detection(image=image, image_size=det_size,
threshold=det_threshold, boxes_ori=boxes_ori, det_sorted=det_sorted)
boxes = res_detection['boxes'].astype(int)
# Char recognition
result_recognition = list()
for box in boxes:
x_min, y_min, x_max, y_max = box
image_crop = image[y_min:y_max, x_min:x_max]
res_recognition = self.char_recognition(image_crop)
if output_type == 'normal':
result_recognition.append(res_recognition)
elif output_type == 'advanced':
result_recognition.append({
'text': res_recognition['text'],
'conf': res_recognition['conf'],
'box': box})
# Output type
if output_type == 'normal':
confidence = self.__calculate_confidence(result_recognition)
text = self.__marger_text(result_recognition)
result = {'confidence': confidence, 'text': text}
elif output_type == 'advanced':
result =result_recognition
return result
if __name__ == '__main__':
import os
import cv2
import sys
import glob
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
from configs.models import *
root_model = DIRECTORY_MODEL
config_det = MODELS['char_detection']
config_recog = MODELS['char_recognition']
ocr = Ocr(root_path_model=root_model,
detection_config=config_det, recognition_config=config_recog)
for i in glob.glob('/Users/alimustofa/Halotec/Datasets/JASAMARGA/REPORT/LPR/old_images/A122_1657688221.jpg'):
image = cv2.imread(i)
result = ocr.ocr(image, output_type='advanced', det_threshold=0.9)
text_ocr = ''.join([i['text'] for i in result])
cv2.imwrite(text_ocr+'.jpg', ocr.visualize_result(image, result))
print(
''.join([i['text'] for i in result]),
)