from PIL import Image import numpy as np import cv2 from typing import Tuple from pytesseract import pytesseract class_names = [ "Community", "Contact", "Education", "Experience", "Interests", "Profile", "Skills", ] number_class_custom = int(len(class_names) + 4) img_width, img_height = None, None left = None top = None ratio = None def preprocess(img: np.array, shape=(640, 640)) -> np.array: global img_width, img_height, left, top, ratio img, ratio, (left, top) = resize_and_pad(img, new_shape=shape) img_height, img_width, _ = img.shape img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.transpose(2, 0, 1) img = img.reshape(1, 3, 640, 640).astype("float32") img = img / 255.0 return img def extract_box(outputs): output0 = outputs[0] output1 = outputs[1] output0 = output0[0].transpose() output1 = output1[0] boxes = output0[:, 0:number_class_custom] masks = output0[:, number_class_custom:] output1 = output1.reshape(32, 160 * 160) output1 = output1.reshape(32, 160 * 160) masks = masks @ output1 boxes = np.hstack([boxes, masks]) return boxes def intersection(box1, box2): box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4] box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4] x1 = max(box1_x1, box2_x1) y1 = max(box1_y1, box2_y1) x2 = min(box1_x2, box2_x2) y2 = min(box1_y2, box2_y2) return (x2 - x1) * (y2 - y1) def union(box1, box2): box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4] box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4] box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1) box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1) return box1_area + box2_area - intersection(box1, box2) def iou(box1, box2): return intersection(box1, box2) / union(box1, box2) def sigmoid(z): return 1 / (1 + np.exp(-z)) def get_mask(row, box, img_width, img_height, threshold): mask = row.reshape(160, 160) mask = sigmoid(mask) mask = (mask > threshold).astype("uint8") * 255 x1, y1, x2, y2 = box mask_x1 = round(x1 / img_width * 160) mask_y1 = round(y1 / img_height * 160) mask_x2 = round(x2 / img_width * 160) mask_y2 = round(y2 / img_height * 160) mask = mask[mask_y1:mask_y2, mask_x1:mask_x2] img_mask = Image.fromarray(mask, "L") img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1))) mask = np.array(img_mask) return mask def get_polygon(mask): contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]] return polygon def postprocess(outputs, threshold_confidence, threshold_iou): objects = [] for row in extract_box(outputs): xc, yc, w, h = row[:4] x1 = (xc - w / 2) / 640 * img_width y1 = (yc - h / 2) / 640 * img_height x2 = (xc + w / 2) / 640 * img_width y2 = (yc + h / 2) / 640 * img_height prob = row[4:number_class_custom].max() if prob < threshold_confidence: continue class_id = row[4:number_class_custom].argmax() label = class_names[class_id] # mask = get_mask( # row[number_class_custom:25684], # (x1, y1, x2, y2), # img_width, # img_height, # threshold=threshold, # ) # polygon = get_polygon(mask) # objects.append([x1, y1, x2, y2, label, prob, mask, polygon]) objects.append([x1, y1, x2, y2, label, prob]) # apply non-maximum suppression objects.sort(key=lambda x: x[5], reverse=True) result = [] while objects: obj = objects.pop(0) result.append(obj) objects = [ other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou ] del objects cropped_images = [ { "box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))), "label": obj[4], "prob": int(obj[5]), } for obj in result ] return cropped_images def extract_text_dict(outputs): result_dict = {} for output in outputs: label = output.get("label").lower() text = output.get("text") if label in result_dict: result_dict[label] += " " + text else: result_dict[label] = text return result_dict def extract_text(outputs, image_origin): for i in range(len(outputs)): image = crop_image(image_origin, outputs[i].get("box")) text = pytesseract.image_to_string(image) outputs[i].update({"text": text}) if "text" in outputs[i]: outputs[i]["text"] += text else: outputs[i].update({"text": text}) return extract_text_dict(outputs) def crop_image(image, box): x1, y1, x2, y2 = map(int, box) cropped_image = image[y1:y2, x1:x2] return cropped_image def resize_and_pad( image: np.array, new_shape: Tuple[int, int], padding_color: Tuple[int] = (144, 144, 144), ) -> np.array: h_org, w_org = image.shape[:2] w_new, h_new = new_shape padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0 # Padding left to right if h_org >= w_org: img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new)) h, w = img_resize.shape[:2] padd_left = (w_new - w) // 2 padd_right = w_new - w - padd_left ratio = h_new / h_org # Padding top to bottom if h_org < w_org: img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org))) h, w = img_resize.shape[:2] padd_top = (h_new - h) // 2 padd_bottom = h_new - h - padd_top ratio = w_new / w_org image = cv2.copyMakeBorder( img_resize, padd_top, padd_bottom, padd_left, padd_right, cv2.BORDER_CONSTANT, None, value=padding_color, ) return image, ratio, (padd_left, padd_top) def unpad_and_resize_boxes(boxes, ratio, left, top): if len(boxes) == 0: return boxes boxes = np.array(boxes) if boxes.ndim == 1: boxes = boxes.reshape(-1, 4) boxes[:, [0, 2]] -= left boxes[:, [1, 3]] -= top boxes[:, :4] /= ratio if len(boxes) == 1: return boxes.flatten().tolist() else: return boxes.tolist() def draw_bounding_boxes(image, outputs, save_path="output_image.jpg"): # Create a copy of the image to draw on image_with_boxes = image.copy() # Define a list of colors for the bounding boxes label_colors = { "Community": (0, 255, 0), "Contact": (0, 0, 255), "Education": (255, 128, 0), "Experience": (255, 0, 255), "Interests": (128, 128, 128), "Profile": (0, 0, 128), "Skills": (128, 0, 128), } # Draw each bounding box and text for output in outputs: box = output["box"] label = output["label"] # Get the color for the label color = label_colors.get( label, (255, 255, 255) ) # Default to white if label not found # Draw the bounding box x1, y1, x2, y2 = box cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2) # Draw the label and text cv2.putText( image_with_boxes, f"{label}", (x1, y1 - 10), cv2.FONT_ITALIC, 2, color, 2, ) image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB) # Convert the OpenCV image (numpy array) to a PIL image image_pil = Image.fromarray(image_with_boxes_rgb) image_pil.save(save_path, format="JPEG") return image_pil