Spaces:

ABAO77
/

CV_parsing

Sleeping

App Files Files Community

CV_parsing / src /utils /utils_segment.py

ABAO77

feat: good code

930ec8d over 1 year ago

raw

history blame contribute delete

7.77 kB

	from PIL import Image
	import numpy as np
	import cv2
	from typing import Tuple
	from pytesseract import pytesseract

	class_names = [
	"Community",
	"Contact",
	"Education",
	"Experience",
	"Interests",
	"Profile",
	"Skills",
	]
	number_class_custom = int(len(class_names) + 4)
	img_width, img_height = None, None
	left = None
	top = None
	ratio = None


	def preprocess(img: np.array, shape=(640, 640)) -> np.array:
	global img_width, img_height, left, top, ratio
	img, ratio, (left, top) = resize_and_pad(img, new_shape=shape)
	img_height, img_width, _ = img.shape
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = img.transpose(2, 0, 1)
	img = img.reshape(1, 3, 640, 640).astype("float32")
	img = img / 255.0
	return img


	def extract_box(outputs):
	output0 = outputs[0]
	output1 = outputs[1]
	output0 = output0[0].transpose()
	output1 = output1[0]
	boxes = output0[:, 0:number_class_custom]
	masks = output0[:, number_class_custom:]
	output1 = output1.reshape(32, 160 * 160)
	output1 = output1.reshape(32, 160 * 160)
	masks = masks @ output1
	boxes = np.hstack([boxes, masks])
	return boxes


	def intersection(box1, box2):
	box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
	box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
	x1 = max(box1_x1, box2_x1)
	y1 = max(box1_y1, box2_y1)
	x2 = min(box1_x2, box2_x2)
	y2 = min(box1_y2, box2_y2)
	return (x2 - x1) * (y2 - y1)


	def union(box1, box2):
	box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
	box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
	box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
	box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
	return box1_area + box2_area - intersection(box1, box2)


	def iou(box1, box2):
	return intersection(box1, box2) / union(box1, box2)


	def sigmoid(z):
	return 1 / (1 + np.exp(-z))


	def get_mask(row, box, img_width, img_height, threshold):
	mask = row.reshape(160, 160)
	mask = sigmoid(mask)
	mask = (mask > threshold).astype("uint8") * 255
	x1, y1, x2, y2 = box
	mask_x1 = round(x1 / img_width * 160)
	mask_y1 = round(y1 / img_height * 160)
	mask_x2 = round(x2 / img_width * 160)
	mask_y2 = round(y2 / img_height * 160)
	mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
	img_mask = Image.fromarray(mask, "L")
	img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1)))
	mask = np.array(img_mask)
	return mask


	def get_polygon(mask):
	contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]]
	return polygon


	def postprocess(outputs, threshold_confidence, threshold_iou):
	objects = []
	for row in extract_box(outputs):
	xc, yc, w, h = row[:4]
	x1 = (xc - w / 2) / 640 * img_width
	y1 = (yc - h / 2) / 640 * img_height
	x2 = (xc + w / 2) / 640 * img_width
	y2 = (yc + h / 2) / 640 * img_height
	prob = row[4:number_class_custom].max()
	if prob < threshold_confidence:
	continue
	class_id = row[4:number_class_custom].argmax()
	label = class_names[class_id]
	# mask = get_mask(
	# row[number_class_custom:25684],
	# (x1, y1, x2, y2),
	# img_width,
	# img_height,
	# threshold=threshold,
	# )
	# polygon = get_polygon(mask)
	# objects.append([x1, y1, x2, y2, label, prob, mask, polygon])
	objects.append([x1, y1, x2, y2, label, prob])

	# apply non-maximum suppression
	objects.sort(key=lambda x: x[5], reverse=True)
	result = []
	while objects:
	obj = objects.pop(0)
	result.append(obj)
	objects = [
	other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou
	]
	del objects

	cropped_images = [
	{
	"box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))),
	"label": obj[4],
	"prob": int(obj[5]),
	}
	for obj in result
	]
	return cropped_images


	def extract_text_dict(outputs):
	result_dict = {}
	for output in outputs:
	label = output.get("label").lower()
	text = output.get("text")
	if label in result_dict:
	result_dict[label] += " " + text
	else:
	result_dict[label] = text

	return result_dict


	def extract_text(outputs, image_origin):
	for i in range(len(outputs)):
	image = crop_image(image_origin, outputs[i].get("box"))
	text = pytesseract.image_to_string(image)
	outputs[i].update({"text": text})
	if "text" in outputs[i]:
	outputs[i]["text"] += text
	else:
	outputs[i].update({"text": text})
	return extract_text_dict(outputs)


	def crop_image(image, box):

	x1, y1, x2, y2 = map(int, box)
	cropped_image = image[y1:y2, x1:x2]
	return cropped_image


	def resize_and_pad(
	image: np.array,
	new_shape: Tuple[int, int],
	padding_color: Tuple[int] = (144, 144, 144),
	) -> np.array:
	h_org, w_org = image.shape[:2]
	w_new, h_new = new_shape
	padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0

	# Padding left to right
	if h_org >= w_org:
	img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new))
	h, w = img_resize.shape[:2]
	padd_left = (w_new - w) // 2
	padd_right = w_new - w - padd_left
	ratio = h_new / h_org

	# Padding top to bottom
	if h_org < w_org:
	img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org)))
	h, w = img_resize.shape[:2]
	padd_top = (h_new - h) // 2
	padd_bottom = h_new - h - padd_top
	ratio = w_new / w_org

	image = cv2.copyMakeBorder(
	img_resize,
	padd_top,
	padd_bottom,
	padd_left,
	padd_right,
	cv2.BORDER_CONSTANT,
	None,
	value=padding_color,
	)

	return image, ratio, (padd_left, padd_top)


	def unpad_and_resize_boxes(boxes, ratio, left, top):

	if len(boxes) == 0:
	return boxes
	boxes = np.array(boxes)
	if boxes.ndim == 1:
	boxes = boxes.reshape(-1, 4)
	boxes[:, [0, 2]] -= left
	boxes[:, [1, 3]] -= top
	boxes[:, :4] /= ratio
	if len(boxes) == 1:
	return boxes.flatten().tolist()
	else:
	return boxes.tolist()


	def draw_bounding_boxes(image, outputs, save_path="output_image.jpg"):
	# Create a copy of the image to draw on
	image_with_boxes = image.copy()

	# Define a list of colors for the bounding boxes
	label_colors = {
	"Community": (0, 255, 0),
	"Contact": (0, 0, 255),
	"Education": (255, 128, 0),
	"Experience": (255, 0, 255),
	"Interests": (128, 128, 128),
	"Profile": (0, 0, 128),
	"Skills": (128, 0, 128),
	}

	# Draw each bounding box and text
	for output in outputs:
	box = output["box"]
	label = output["label"]

	# Get the color for the label
	color = label_colors.get(
	label, (255, 255, 255)
	) # Default to white if label not found

	# Draw the bounding box
	x1, y1, x2, y2 = box
	cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2)
	# Draw the label and text
	cv2.putText(
	image_with_boxes,
	f"{label}",
	(x1, y1 - 10),
	cv2.FONT_ITALIC,
	2,
	color,
	2,
	)
	image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)

	# Convert the OpenCV image (numpy array) to a PIL image
	image_pil = Image.fromarray(image_with_boxes_rgb)
	image_pil.save(save_path, format="JPEG")
	return image_pil