|
|
|
|
|
import re, json, tempfile |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
import gc |
|
|
import psutil |
|
|
from ultralytics import YOLO |
|
|
from paddleocr import PaddleOCR |
|
|
|
|
|
np.int = int |
|
|
|
|
|
CHAR_LIST = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ ") |
|
|
yolo = YOLO("models/best.pt") |
|
|
|
|
|
def print_mem_usage(tag=""): |
|
|
mem = psutil.virtual_memory() |
|
|
print(f"[{tag}] RAM usage: {mem.used / 1024**2:.2f} MB / {mem.total / 1024**2:.2f} MB ({mem.percent}%)") |
|
|
|
|
|
def normalize_ocr(recs): |
|
|
if not recs: |
|
|
return "", 0.0 |
|
|
first = recs[0] |
|
|
if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[0], str): |
|
|
return first[0], float(first[1]) |
|
|
if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[1], (list, tuple)): |
|
|
return first[1][0], float(first[1][1]) |
|
|
return "", 0.0 |
|
|
|
|
|
def format_plate(s: str) -> str: |
|
|
s = re.sub(r'[^A-Z0-9]', '', s.upper()) |
|
|
m = re.match(r'^(\d{2})([A-Z]{1,3})(\d{2,4})$', s) |
|
|
return f"{m.group(1)} {m.group(2)} {m.group(3)}" if m else f"RAW: {s}" if s else "Unknown" |
|
|
|
|
|
def correct_perspective(image, box): |
|
|
x1, y1, x2, y2 = box |
|
|
h, w = image.shape[:2] |
|
|
margin = 5 |
|
|
x1 = max(0, x1 - margin) |
|
|
y1 = max(0, y1 - margin) |
|
|
x2 = min(w, x2 + margin) |
|
|
y2 = min(h, y2 + margin) |
|
|
crop = image[y1:y2, x1:x2] |
|
|
if crop.size == 0: |
|
|
return None |
|
|
src_pts = np.float32([[0, 0], [crop.shape[1], 0], [crop.shape[1], crop.shape[0]], [0, crop.shape[0]]]) |
|
|
dst_pts = np.float32([[0, 0], [128, 0], [128, 32], [0, 32]]) |
|
|
M = cv2.getPerspectiveTransform(src_pts, dst_pts) |
|
|
warped = cv2.warpPerspective(crop, M, (128, 32)) |
|
|
return warped |
|
|
|
|
|
def create_ocr(): |
|
|
ocr = PaddleOCR( |
|
|
det=False, |
|
|
rec=True, |
|
|
rec_model_dir="models/ocr_model", |
|
|
rec_image_shape="3,32,128", |
|
|
cls=True, |
|
|
use_angle_cls=True, |
|
|
use_space_char=True |
|
|
) |
|
|
ocr.text_recognizer.character = CHAR_LIST |
|
|
return ocr |
|
|
|
|
|
def run_image(img, conf=0.25): |
|
|
ocr = create_ocr() |
|
|
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) |
|
|
res = yolo(bgr, conf=conf)[0] |
|
|
out = bgr.copy() |
|
|
|
|
|
for box in res.boxes.xyxy.cpu().numpy().astype(int): |
|
|
warped = correct_perspective(out, box) |
|
|
if warped is None: |
|
|
continue |
|
|
try: |
|
|
recs = ocr.ocr(warped, det=False, cls=True) |
|
|
except: |
|
|
recs = [] |
|
|
gc.collect() |
|
|
print_mem_usage("After OCR") |
|
|
txt, score = normalize_ocr(recs) |
|
|
plate = format_plate(txt) |
|
|
label = f"{plate} ({score:.2f})" |
|
|
x1, y1, x2, y2 = box |
|
|
cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2) |
|
|
cv2.putText(out, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) |
|
|
|
|
|
return cv2.cvtColor(out, cv2.COLOR_BGR2RGB), f"{len(res.boxes)} plate(s) detected" |
|
|
|
|
|
def run_video(video_file, conf=0.25): |
|
|
ocr = create_ocr() |
|
|
cap = cv2.VideoCapture(video_file) |
|
|
fps = cap.get(cv2.CAP_PROP_FPS) or 30 |
|
|
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name |
|
|
writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) |
|
|
records, idx = [], 0 |
|
|
|
|
|
while True: |
|
|
ret, frame = cap.read() |
|
|
if not ret: |
|
|
break |
|
|
idx += 1 |
|
|
t = idx / fps |
|
|
res = yolo(frame, conf=conf)[0] |
|
|
|
|
|
for box in res.boxes.xyxy.cpu().numpy().astype(int): |
|
|
warped = correct_perspective(frame, box) |
|
|
if warped is None: |
|
|
continue |
|
|
try: |
|
|
recs = ocr.ocr(warped, det=False, cls=True) |
|
|
except: |
|
|
recs = [] |
|
|
gc.collect() |
|
|
print_mem_usage("After OCR") |
|
|
txt, score = normalize_ocr(recs) |
|
|
plate = format_plate(txt) |
|
|
raw_txt = plate[5:] if plate.startswith("RAW:") else plate |
|
|
if raw_txt != "Unknown": |
|
|
records.append({"time_s": round(t, 2), "plate": raw_txt, "conf": round(score, 3)}) |
|
|
x1, y1, x2, y2 = box |
|
|
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) |
|
|
cv2.putText(frame, plate, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) |
|
|
|
|
|
writer.write(frame) |
|
|
|
|
|
cap.release() |
|
|
writer.release() |
|
|
with open("output.json", "w") as f: |
|
|
json.dump(records, f, indent=2) |
|
|
return out_path, "Done" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## 🚗 License Plate Detection + Recognition") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
img_in = gr.Image(type="numpy", label="Upload Image") |
|
|
vid_in = gr.File(label="Upload Video (.mp4)") |
|
|
conf = gr.Slider(0.0, 1.0, value=0.25, step=0.01, label="YOLO Confidence") |
|
|
btn_i = gr.Button("Run Image") |
|
|
btn_v = gr.Button("Run Video") |
|
|
with gr.Column(): |
|
|
img_out = gr.Image(type="numpy", label="Annotated Image") |
|
|
vid_out = gr.Video(label="Annotated Video") |
|
|
status = gr.Textbox(label="Status / JSON Path") |
|
|
|
|
|
btn_i.click(run_image, [img_in, conf], [img_out, status]) |
|
|
btn_v.click(run_video, [vid_in, conf], [vid_out, status]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|