| """ |
| Inference script for CLPRNet with PARSeq Tiny backbone. |
| |
| Two-stage inference: |
| 1. Detection: CLPRNet backbone + detection head -> boxes (with NMS) |
| 2. Recognition: Crop detected plates -> PARSeq Tiny -> plate strings |
| """ |
|
|
| from model_parseq import CLPRNetPARSeq, Tokenizer |
| import torch |
| import torchvision.transforms as transforms |
| import torch.nn.functional as F |
| import os |
| import numpy as np |
| import cv2 |
| from PIL import Image, ImageDraw, ImageFont |
|
|
|
|
| CHARACTER = Tokenizer.CHARSET |
|
|
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| img_size = (1024, 1024) |
|
|
| font_size = 30 |
| try: |
| font = ImageFont.truetype('resource/msyh.ttc', font_size, encoding='utf-8') |
| except: |
| font = ImageFont.load_default() |
|
|
| |
| model = CLPRNetPARSeq(max_label_length=8) |
| model = model.to(DEVICE) |
| model.load_state_dict(torch.load('resource/CLPRNet_PARSeq.pth', map_location=DEVICE)) |
| model.eval() |
|
|
| if not os.path.exists('output'): |
| os.makedirs('output') |
|
|
| tran = transforms.Compose([ |
| transforms.ToPILImage(), |
| transforms.ToTensor(), |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
| ]) |
|
|
|
|
| def IOU(box, other_boxes): |
| box_area = (box[2] - box[0]) * (box[3] - box[1]) |
| other_boxes_area = (other_boxes[:, 2] - other_boxes[:, 0]) * (other_boxes[:, 3] - other_boxes[:, 1]) |
| x1 = torch.max(box[0], other_boxes[:, 0]) |
| y1 = torch.max(box[1], other_boxes[:, 1]) |
| x2 = torch.min(box[2], other_boxes[:, 2]) |
| y2 = torch.min(box[3], other_boxes[:, 3]) |
| Min = torch.zeros(1, device=box.device) |
| w, h = torch.max(Min, x2 - x1), torch.max(Min, y2 - y1) |
| overlap_area = w * h |
| iou = overlap_area / (box_area + other_boxes_area - overlap_area + 1e-6) |
| return iou |
|
|
|
|
| def NMS(boxes, C=0.3): |
| if len(boxes) == 0: |
| return [] |
| sort_boxes = boxes[boxes[:, 0].argsort(descending=True)] |
| keep = [] |
| while len(sort_boxes) > 0: |
| ref_box = sort_boxes[0] |
| keep.append(ref_box) |
| if len(sort_boxes) > 1: |
| other_boxes = sort_boxes[1:] |
| sort_boxes = other_boxes[torch.where(IOU(ref_box[1:5], other_boxes[:, 1:5]) < C)] |
| else: |
| break |
| return torch.stack(keep) |
|
|
|
|
| def inference(src, image_list): |
| |
| grid = 64 |
| mask_x = (np.array([[i for i in range(grid)]] * grid) + 0.5) * img_size[0] / grid |
| mask_y = (np.array([[i] * grid for i in range(grid)]) + 0.5) * img_size[1] / grid |
| mask = torch.from_numpy(np.stack([mask_x, mask_y], axis=2)) |
| x_mask = mask[:, :, 0].to(DEVICE).unsqueeze_(dim=2) |
| y_mask = mask[:, :, 1].to(DEVICE).unsqueeze_(dim=2) |
|
|
| for img_name in image_list: |
| print(img_name) |
| org_img = cv2.imread(os.path.join(src, img_name)) |
| |
| |
| height, width, _ = org_img.shape |
| size = height if height > width else width |
| img2 = np.zeros((size, size, 3)).astype("uint8") |
| if height == size: |
| y = 0 |
| x = (size - width) // 2 |
| else: |
| x = 0 |
| y = (size - height) // 2 |
| img2[y:y + height, x:x + width, :] = org_img |
| img = cv2.resize(img2, img_size) |
|
|
| |
| inputs = img[:, :, ::-1] |
| inputs = tran(inputs) |
| inputs = inputs.unsqueeze(dim=0) |
| inputs = inputs.to(DEVICE) |
| |
| with torch.no_grad(): |
| |
| y_detection, _, at_lp, _ = model(inputs) |
|
|
| |
| for index in range(y_detection.shape[0]): |
| l, t, r, b, c = torch.split(y_detection[index, :, :, :5], 1, dim=-1) |
| l = x_mask - l * inputs.shape[3] |
| t = y_mask - t * inputs.shape[2] |
| r = x_mask + r * inputs.shape[3] |
| b = y_mask + b * inputs.shape[2] |
|
|
| |
| out = torch.flatten(torch.concat([c, l, t, r, b], dim=2), start_dim=0, end_dim=1) |
| out = out[torch.where(out[:, 0] > 0.3)] |
| |
| if len(out) == 0: |
| print(" No plates detected") |
| continue |
| |
| out = NMS(out, 0.3) |
|
|
| |
| boxes_for_rec = [det[1:5] for det in out] |
| boxes_tensor = torch.stack(boxes_for_rec) |
| |
| with torch.no_grad(): |
| plate_texts, confidences = model.recognize_plates( |
| inputs[index:index+1], [boxes_tensor] |
| ) |
|
|
| |
| preb_lurd_list = [] |
| preb_pl_list = [] |
| preb_c = [] |
| |
| for i, det in enumerate(out): |
| lurd = torch.tensor([det[1], det[2], det[3], det[4]]).cpu().numpy() |
| lurd[0] = lurd[0] * size / img_size[0] - x |
| lurd[1] = lurd[1] * size / img_size[1] - y |
| lurd[2] = lurd[2] * size / img_size[0] - x |
| lurd[3] = lurd[3] * size / img_size[1] - y |
| |
| preb_lurd_list.append(lurd.astype('int32')) |
| |
| if i < len(plate_texts): |
| preb_pl_list.append(plate_texts[i]) |
| det_conf = float(det[0].cpu().numpy()) |
| rec_conf = confidences[i] if i < len(confidences) else 0.0 |
| preb_c.append(round(det_conf * rec_conf, 3)) |
| else: |
| preb_pl_list.append("???") |
| preb_c.append(0.0) |
|
|
| |
| for i in preb_lurd_list: |
| cv2.rectangle(org_img, i[:2], i[2:], (0, 0, 255), 2) |
|
|
| org_img_rgb = org_img[:, :, ::-1] |
| org_img_pil = Image.fromarray(org_img_rgb.astype('uint8')).convert('RGB') |
| draw = ImageDraw.Draw(org_img_pil) |
|
|
| for i in range(len(preb_pl_list)): |
| label_text = f"{preb_pl_list[i]}_{preb_c[i]}" |
| label_size = int(draw.textlength(label_text, font)) |
| draw.rectangle( |
| [(preb_lurd_list[i][0], preb_lurd_list[i][1] - font_size), |
| (preb_lurd_list[i][0] + label_size, preb_lurd_list[i][1])], |
| fill='red' |
| ) |
| draw.text( |
| xy=(preb_lurd_list[i][0], preb_lurd_list[i][1] - int(font_size * 1.25)), |
| text=label_text, |
| fill=(255, 255, 255), |
| font=font |
| ) |
| print(f" Plate: {preb_pl_list[i]}, Conf: {preb_c[i]}") |
|
|
| org_img_pil.save('output/' + img_name) |
|
|
|
|
| if __name__ == '__main__': |
| src = 'image' |
| inference(src, os.listdir(src)) |
|
|