Spaces:

ABAO77
/

CV_parsing

Sleeping

File size: 7,772 Bytes

from PIL import Image
import numpy as np
import cv2
from typing import Tuple
from pytesseract import pytesseract

class_names = [
    "Community",
    "Contact",
    "Education",
    "Experience",
    "Interests",
    "Profile",
    "Skills",
]
number_class_custom = int(len(class_names) + 4)
img_width, img_height = None, None
left = None
top = None
ratio = None


def preprocess(img: np.array, shape=(640, 640)) -> np.array:
    global img_width, img_height, left, top, ratio
    img, ratio, (left, top) = resize_and_pad(img, new_shape=shape)
    img_height, img_width, _ = img.shape
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.transpose(2, 0, 1)
    img = img.reshape(1, 3, 640, 640).astype("float32")
    img = img / 255.0
    return img


def extract_box(outputs):
    output0 = outputs[0]
    output1 = outputs[1]
    output0 = output0[0].transpose()
    output1 = output1[0]
    boxes = output0[:, 0:number_class_custom]
    masks = output0[:, number_class_custom:]
    output1 = output1.reshape(32, 160 * 160)
    output1 = output1.reshape(32, 160 * 160)
    masks = masks @ output1
    boxes = np.hstack([boxes, masks])
    return boxes


def intersection(box1, box2):
    box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
    box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
    x1 = max(box1_x1, box2_x1)
    y1 = max(box1_y1, box2_y1)
    x2 = min(box1_x2, box2_x2)
    y2 = min(box1_y2, box2_y2)
    return (x2 - x1) * (y2 - y1)


def union(box1, box2):
    box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
    box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
    return box1_area + box2_area - intersection(box1, box2)


def iou(box1, box2):
    return intersection(box1, box2) / union(box1, box2)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def get_mask(row, box, img_width, img_height, threshold):
    mask = row.reshape(160, 160)
    mask = sigmoid(mask)
    mask = (mask > threshold).astype("uint8") * 255
    x1, y1, x2, y2 = box
    mask_x1 = round(x1 / img_width * 160)
    mask_y1 = round(y1 / img_height * 160)
    mask_x2 = round(x2 / img_width * 160)
    mask_y2 = round(y2 / img_height * 160)
    mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
    img_mask = Image.fromarray(mask, "L")
    img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1)))
    mask = np.array(img_mask)
    return mask


def get_polygon(mask):
    contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]]
    return polygon


def postprocess(outputs, threshold_confidence, threshold_iou):
    objects = []
    for row in extract_box(outputs):
        xc, yc, w, h = row[:4]
        x1 = (xc - w / 2) / 640 * img_width
        y1 = (yc - h / 2) / 640 * img_height
        x2 = (xc + w / 2) / 640 * img_width
        y2 = (yc + h / 2) / 640 * img_height
        prob = row[4:number_class_custom].max()
        if prob < threshold_confidence:
            continue
        class_id = row[4:number_class_custom].argmax()
        label = class_names[class_id]
        # mask = get_mask(
        #     row[number_class_custom:25684],
        #     (x1, y1, x2, y2),
        #     img_width,
        #     img_height,
        #     threshold=threshold,
        # )
        # polygon = get_polygon(mask)
        # objects.append([x1, y1, x2, y2, label, prob, mask, polygon])
        objects.append([x1, y1, x2, y2, label, prob])

    # apply non-maximum suppression
    objects.sort(key=lambda x: x[5], reverse=True)
    result = []
    while objects:
        obj = objects.pop(0)
        result.append(obj)
        objects = [
            other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou
        ]
    del objects

    cropped_images = [
        {
            "box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))),
            "label": obj[4],
            "prob": int(obj[5]),
        }
        for obj in result
    ]
    return cropped_images


def extract_text_dict(outputs):
    result_dict = {}
    for output in outputs:
        label = output.get("label").lower()
        text = output.get("text")
        if label in result_dict:
            result_dict[label] += " " + text
        else:
            result_dict[label] = text

    return result_dict


def extract_text(outputs, image_origin):
    for i in range(len(outputs)):
        image = crop_image(image_origin, outputs[i].get("box"))
        text = pytesseract.image_to_string(image)
        outputs[i].update({"text": text})
        if "text" in outputs[i]:
            outputs[i]["text"] += text
        else:
            outputs[i].update({"text": text})
    return extract_text_dict(outputs)


def crop_image(image, box):

    x1, y1, x2, y2 = map(int, box)
    cropped_image = image[y1:y2, x1:x2]
    return cropped_image


def resize_and_pad(
    image: np.array,
    new_shape: Tuple[int, int],
    padding_color: Tuple[int] = (144, 144, 144),
) -> np.array:
    h_org, w_org = image.shape[:2]
    w_new, h_new = new_shape
    padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0

    # Padding left to right
    if h_org >= w_org:
        img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new))
        h, w = img_resize.shape[:2]
        padd_left = (w_new - w) // 2
        padd_right = w_new - w - padd_left
        ratio = h_new / h_org

    # Padding top to bottom
    if h_org < w_org:
        img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org)))
        h, w = img_resize.shape[:2]
        padd_top = (h_new - h) // 2
        padd_bottom = h_new - h - padd_top
        ratio = w_new / w_org

    image = cv2.copyMakeBorder(
        img_resize,
        padd_top,
        padd_bottom,
        padd_left,
        padd_right,
        cv2.BORDER_CONSTANT,
        None,
        value=padding_color,
    )

    return image, ratio, (padd_left, padd_top)


def unpad_and_resize_boxes(boxes, ratio, left, top):

    if len(boxes) == 0:
        return boxes
    boxes = np.array(boxes)
    if boxes.ndim == 1:
        boxes = boxes.reshape(-1, 4)
    boxes[:, [0, 2]] -= left
    boxes[:, [1, 3]] -= top
    boxes[:, :4] /= ratio
    if len(boxes) == 1:
        return boxes.flatten().tolist()
    else:
        return boxes.tolist()


def draw_bounding_boxes(image, outputs, save_path="output_image.jpg"):
    # Create a copy of the image to draw on
    image_with_boxes = image.copy()

    # Define a list of colors for the bounding boxes
    label_colors = {
        "Community": (0, 255, 0),
        "Contact": (0, 0, 255),
        "Education": (255, 128, 0),
        "Experience": (255, 0, 255),
        "Interests": (128, 128, 128),
        "Profile": (0, 0, 128),
        "Skills": (128, 0, 128),
    }

    # Draw each bounding box and text
    for output in outputs:
        box = output["box"]
        label = output["label"]

        # Get the color for the label
        color = label_colors.get(
            label, (255, 255, 255)
        )  # Default to white if label not found

        # Draw the bounding box
        x1, y1, x2, y2 = box
        cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2)
        # Draw the label and text
        cv2.putText(
            image_with_boxes,
            f"{label}",
            (x1, y1 - 10),
            cv2.FONT_ITALIC,
            2,
            color,
            2,
        )
    image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)

    # Convert the OpenCV image (numpy array) to a PIL image
    image_pil = Image.fromarray(image_with_boxes_rgb)
    image_pil.save(save_path, format="JPEG")
    return image_pil