Spaces:

pmkhanh7890
/

kleverocr

Sleeping

kleverocr / src /postprocessing.py

export markdown

c46dcdd over 2 years ago

1.72 kB

	"""
	Author: Khanh Phan
	Date: 2023-11-01
	"""

	import numpy as np
	from paddleocr import PaddleOCR

	from src.settings import RECOGNITION_THRESHOLD
	from src.utilities import crop_image


	def postprocess_result(
	image: np.array,
	result: list[list, tuple([str, float])],
	OCR: PaddleOCR,
	) -> list[list, tuple[str, float]]:
	"""
	Post-processing steps to improve the results
	args:
	image(array): RGB image
	result(list): boxes with shape(N, 4, 2), text and score
	return(Image\|array):
	updated result
	"""
	new_result = []
	for line in result[0]:
	if line[1][1] < RECOGNITION_THRESHOLD:
	"""
	boxes = line[0], txts = line[1][0], scores = line[1][1]
	"""
	line[1] = recognize_text_by_multilanguage(image, line, OCR)
	new_result.append(line)

	return [new_result]


	def recognize_text_by_multilanguage(
	image: np.array,
	line: [list, tuple([str, float])],
	OCR: PaddleOCR,
	) -> tuple([str, float]):
	"""
	Do recognition again on the text having low recognition score.
	args:
	image(Image\|array): RGB image
	result(list): boxes with shape(N, 4, 2), text and score
	return(Image\|array):
	updated result
	"""
	box = line[0]
	txt = line[1][0]
	score = line[1][1]
	cropped_image = crop_image(image, box)
	result = OCR.ocr(cropped_image, cls=True, det=False, rec=True)
	if result[0][0][1] > score:
	# print(f"[{score}]{txt} -----> [{result[0][0][1]}]{result[0][0][0]}")
	txt = result[0][0][0]
	score = result[0][0][1]
	# else:
	# print(f"[{score}]{txt} --X--> [{result[0][0][1]}]{result[0][0][0]}")

	return (txt, score)