Spaces:
Sleeping
Sleeping
| """ | |
| Author: Khanh Phan | |
| Date: 2023-11-01 | |
| """ | |
| import numpy as np | |
| from paddleocr import PaddleOCR | |
| from src.settings import RECOGNITION_THRESHOLD | |
| from src.utilities import crop_image | |
| def postprocess_result( | |
| image: np.array, | |
| result: list[list, tuple([str, float])], | |
| OCR: PaddleOCR, | |
| ) -> list[list, tuple[str, float]]: | |
| """ | |
| Post-processing steps to improve the results | |
| args: | |
| image(array): RGB image | |
| result(list): boxes with shape(N, 4, 2), text and score | |
| return(Image|array): | |
| updated result | |
| """ | |
| new_result = [] | |
| for line in result[0]: | |
| if line[1][1] < RECOGNITION_THRESHOLD: | |
| """ | |
| boxes = line[0], txts = line[1][0], scores = line[1][1] | |
| """ | |
| line[1] = recognize_text_by_multilanguage(image, line, OCR) | |
| new_result.append(line) | |
| return [new_result] | |
| def recognize_text_by_multilanguage( | |
| image: np.array, | |
| line: [list, tuple([str, float])], | |
| OCR: PaddleOCR, | |
| ) -> tuple([str, float]): | |
| """ | |
| Do recognition again on the text having low recognition score. | |
| args: | |
| image(Image|array): RGB image | |
| result(list): boxes with shape(N, 4, 2), text and score | |
| return(Image|array): | |
| updated result | |
| """ | |
| box = line[0] | |
| txt = line[1][0] | |
| score = line[1][1] | |
| cropped_image = crop_image(image, box) | |
| result = OCR.ocr(cropped_image, cls=True, det=False, rec=True) | |
| if result[0][0][1] > score: | |
| # print(f"[{score}]{txt} -----> [{result[0][0][1]}]{result[0][0][0]}") | |
| txt = result[0][0][0] | |
| score = result[0][0][1] | |
| # else: | |
| # print(f"[{score}]{txt} --X--> [{result[0][0][1]}]{result[0][0][0]}") | |
| return (txt, score) | |