Spaces:

jayllfpt
/

bib-number

Sleeping

App Files Files Community

bib-number / BIB_Extraction.py

jayllfpt

upload files

aa1a0f3 over 1 year ago

raw

history blame contribute delete

2.44 kB

	from akaocr import TextEngine, BoxEngine
	import cv2
	import numpy as np
	from PIL import Image
	import re


	def transform_image(image, box):
	# Get perspective transform image
	assert len(box) == 4, "Shape of points must be 4x2"
	img_crop_width = int(
	max(
	np.linalg.norm(box[0] - box[1]),
	np.linalg.norm(box[2] - box[3])))
	img_crop_height = int(
	max(
	np.linalg.norm(box[0] - box[3]),
	np.linalg.norm(box[1] - box[2])))
	pts_std = np.float32([[0, 0],
	[img_crop_width, 0],
	[img_crop_width, img_crop_height],
	[0, img_crop_height]])
	box = np.array(box, dtype="float32")
	M = cv2.getPerspectiveTransform(box, pts_std)
	dst_img = cv2.warpPerspective(
	image,
	M, (img_crop_width, img_crop_height),
	borderMode=cv2.BORDER_REPLICATE,
	flags=cv2.INTER_CUBIC)

	img_height, img_width = dst_img.shape[0:2]
	if img_height/img_width >= 1.25:
	dst_img = np.rot90(dst_img, k=3)

	return dst_img


	def two_pts(bounding_box):
	# convert 4-points-bounding-box to 2-points-bounding-box
	return (
	(
	round(min([x[0] for x in bounding_box])),
	round(min([x[1] for x in bounding_box]))
	),
	(
	round(max([x[0] for x in bounding_box])),
	round(max([x[1] for x in bounding_box]))
	)
	)


	class BIB_Extract:
	def __init__(self):
	# Initialize the OCR engines
	self.box_engine = BoxEngine()
	self.text_engine = TextEngine()

	def __call__(self, image, bib_length):
	boxes = self.box_engine(image)
	images = []
	# crop and transform images for recognition
	for box in boxes[::-1]:
	# org_image = cv2.polylines(org_image, [box.astype(
	# np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
	crop_img = transform_image(image, box)
	images.append(crop_img)

	# Get the texts from the boxes
	texts = self.text_engine(images)
	return self.BIB_filter(texts, bib_length)

	def BIB_filter(self, texts, bib_length):
	pattern = rf'^\d{{{bib_length}}}$'
	return [s[0] for s in texts if re.match(pattern, s[0])]


	if __name__ == '__main__':
	image = cv2.imread("1.jpg")
	engine = BIB_Extract()
	print(engine(image, bib_length=4))