CV_parsing / src /utils /utils_segment.py
ABAO77's picture
feat: good code
930ec8d
from PIL import Image
import numpy as np
import cv2
from typing import Tuple
from pytesseract import pytesseract
class_names = [
"Community",
"Contact",
"Education",
"Experience",
"Interests",
"Profile",
"Skills",
]
number_class_custom = int(len(class_names) + 4)
img_width, img_height = None, None
left = None
top = None
ratio = None
def preprocess(img: np.array, shape=(640, 640)) -> np.array:
global img_width, img_height, left, top, ratio
img, ratio, (left, top) = resize_and_pad(img, new_shape=shape)
img_height, img_width, _ = img.shape
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.transpose(2, 0, 1)
img = img.reshape(1, 3, 640, 640).astype("float32")
img = img / 255.0
return img
def extract_box(outputs):
output0 = outputs[0]
output1 = outputs[1]
output0 = output0[0].transpose()
output1 = output1[0]
boxes = output0[:, 0:number_class_custom]
masks = output0[:, number_class_custom:]
output1 = output1.reshape(32, 160 * 160)
output1 = output1.reshape(32, 160 * 160)
masks = masks @ output1
boxes = np.hstack([boxes, masks])
return boxes
def intersection(box1, box2):
box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
x1 = max(box1_x1, box2_x1)
y1 = max(box1_y1, box2_y1)
x2 = min(box1_x2, box2_x2)
y2 = min(box1_y2, box2_y2)
return (x2 - x1) * (y2 - y1)
def union(box1, box2):
box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
return box1_area + box2_area - intersection(box1, box2)
def iou(box1, box2):
return intersection(box1, box2) / union(box1, box2)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def get_mask(row, box, img_width, img_height, threshold):
mask = row.reshape(160, 160)
mask = sigmoid(mask)
mask = (mask > threshold).astype("uint8") * 255
x1, y1, x2, y2 = box
mask_x1 = round(x1 / img_width * 160)
mask_y1 = round(y1 / img_height * 160)
mask_x2 = round(x2 / img_width * 160)
mask_y2 = round(y2 / img_height * 160)
mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
img_mask = Image.fromarray(mask, "L")
img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1)))
mask = np.array(img_mask)
return mask
def get_polygon(mask):
contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]]
return polygon
def postprocess(outputs, threshold_confidence, threshold_iou):
objects = []
for row in extract_box(outputs):
xc, yc, w, h = row[:4]
x1 = (xc - w / 2) / 640 * img_width
y1 = (yc - h / 2) / 640 * img_height
x2 = (xc + w / 2) / 640 * img_width
y2 = (yc + h / 2) / 640 * img_height
prob = row[4:number_class_custom].max()
if prob < threshold_confidence:
continue
class_id = row[4:number_class_custom].argmax()
label = class_names[class_id]
# mask = get_mask(
# row[number_class_custom:25684],
# (x1, y1, x2, y2),
# img_width,
# img_height,
# threshold=threshold,
# )
# polygon = get_polygon(mask)
# objects.append([x1, y1, x2, y2, label, prob, mask, polygon])
objects.append([x1, y1, x2, y2, label, prob])
# apply non-maximum suppression
objects.sort(key=lambda x: x[5], reverse=True)
result = []
while objects:
obj = objects.pop(0)
result.append(obj)
objects = [
other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou
]
del objects
cropped_images = [
{
"box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))),
"label": obj[4],
"prob": int(obj[5]),
}
for obj in result
]
return cropped_images
def extract_text_dict(outputs):
result_dict = {}
for output in outputs:
label = output.get("label").lower()
text = output.get("text")
if label in result_dict:
result_dict[label] += " " + text
else:
result_dict[label] = text
return result_dict
def extract_text(outputs, image_origin):
for i in range(len(outputs)):
image = crop_image(image_origin, outputs[i].get("box"))
text = pytesseract.image_to_string(image)
outputs[i].update({"text": text})
if "text" in outputs[i]:
outputs[i]["text"] += text
else:
outputs[i].update({"text": text})
return extract_text_dict(outputs)
def crop_image(image, box):
x1, y1, x2, y2 = map(int, box)
cropped_image = image[y1:y2, x1:x2]
return cropped_image
def resize_and_pad(
image: np.array,
new_shape: Tuple[int, int],
padding_color: Tuple[int] = (144, 144, 144),
) -> np.array:
h_org, w_org = image.shape[:2]
w_new, h_new = new_shape
padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0
# Padding left to right
if h_org >= w_org:
img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new))
h, w = img_resize.shape[:2]
padd_left = (w_new - w) // 2
padd_right = w_new - w - padd_left
ratio = h_new / h_org
# Padding top to bottom
if h_org < w_org:
img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org)))
h, w = img_resize.shape[:2]
padd_top = (h_new - h) // 2
padd_bottom = h_new - h - padd_top
ratio = w_new / w_org
image = cv2.copyMakeBorder(
img_resize,
padd_top,
padd_bottom,
padd_left,
padd_right,
cv2.BORDER_CONSTANT,
None,
value=padding_color,
)
return image, ratio, (padd_left, padd_top)
def unpad_and_resize_boxes(boxes, ratio, left, top):
if len(boxes) == 0:
return boxes
boxes = np.array(boxes)
if boxes.ndim == 1:
boxes = boxes.reshape(-1, 4)
boxes[:, [0, 2]] -= left
boxes[:, [1, 3]] -= top
boxes[:, :4] /= ratio
if len(boxes) == 1:
return boxes.flatten().tolist()
else:
return boxes.tolist()
def draw_bounding_boxes(image, outputs, save_path="output_image.jpg"):
# Create a copy of the image to draw on
image_with_boxes = image.copy()
# Define a list of colors for the bounding boxes
label_colors = {
"Community": (0, 255, 0),
"Contact": (0, 0, 255),
"Education": (255, 128, 0),
"Experience": (255, 0, 255),
"Interests": (128, 128, 128),
"Profile": (0, 0, 128),
"Skills": (128, 0, 128),
}
# Draw each bounding box and text
for output in outputs:
box = output["box"]
label = output["label"]
# Get the color for the label
color = label_colors.get(
label, (255, 255, 255)
) # Default to white if label not found
# Draw the bounding box
x1, y1, x2, y2 = box
cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2)
# Draw the label and text
cv2.putText(
image_with_boxes,
f"{label}",
(x1, y1 - 10),
cv2.FONT_ITALIC,
2,
color,
2,
)
image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)
# Convert the OpenCV image (numpy array) to a PIL image
image_pil = Image.fromarray(image_with_boxes_rgb)
image_pil.save(save_path, format="JPEG")
return image_pil