File size: 5,273 Bytes
a0e95d4
aec9462
52b0e0c
8c48635
b424e54
1d550f3
 
72b3efa
 
 
3a43469
52b0e0c
84a96e9
 
0839a8a
1d550f3
 
 
 
84a96e9
 
 
 
3a43469
84a96e9
3a43469
84a96e9
 
e20642b
52b0e0c
c557d41
aec9462
3c9a517
 
 
 
 
3a43469
3c9a517
 
 
 
 
3a43469
 
 
 
3c9a517
 
 
 
3a43469
 
 
 
 
 
 
 
 
 
 
 
 
72b3efa
3a43469
72b3efa
84a96e9
72b3efa
089fa45
aec9462
3c9a517
3a43469
 
aec9462
3c9a517
 
aec9462
3a43469
 
aec9462
 
 
3c9a517
3a43469
 
aec9462
 
 
72b3efa
3a43469
10b8a7d
52b0e0c
3c9a517
52b0e0c
3a43469
aec9462
52b0e0c
8c48635
847a398
3a43469
 
 
 
84a96e9
3c9a517
aec9462
3c9a517
3a43469
 
aec9462
3c9a517
aec9462
 
3a43469
 
aec9462
 
3a43469
3c9a517
3a43469
3c9a517
3a43469
 
97b6396
eefc184
97b6396
3a43469
 
 
a0e95d4
52b0e0c
0839a8a
8c48635
a0e95d4
8c48635
 
0839a8a
 
3a43469
 
 
8c48635
0839a8a
 
3a43469
 
 
 
089fa45
52b0e0c
0839a8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# app.py
import re, json, tempfile
import cv2
import numpy as np
import gradio as gr
import gc
import psutil
from ultralytics import YOLO
from paddleocr import PaddleOCR

np.int = int  # For backward compatibility

CHAR_LIST = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ ")
yolo = YOLO("models/best.pt")

def print_mem_usage(tag=""):
    mem = psutil.virtual_memory()
    print(f"[{tag}] RAM usage: {mem.used / 1024**2:.2f} MB / {mem.total / 1024**2:.2f} MB ({mem.percent}%)")

def normalize_ocr(recs):
    if not recs:
        return "", 0.0
    first = recs[0]
    if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[0], str):
        return first[0], float(first[1])
    if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[1], (list, tuple)):
        return first[1][0], float(first[1][1])
    return "", 0.0

def format_plate(s: str) -> str:
    s = re.sub(r'[^A-Z0-9]', '', s.upper())
    m = re.match(r'^(\d{2})([A-Z]{1,3})(\d{2,4})$', s)
    return f"{m.group(1)} {m.group(2)} {m.group(3)}" if m else f"RAW: {s}" if s else "Unknown"

def correct_perspective(image, box):
    x1, y1, x2, y2 = box
    h, w = image.shape[:2]
    margin = 5
    x1 = max(0, x1 - margin)
    y1 = max(0, y1 - margin)
    x2 = min(w, x2 + margin)
    y2 = min(h, y2 + margin)
    crop = image[y1:y2, x1:x2]
    if crop.size == 0:
        return None
    src_pts = np.float32([[0, 0], [crop.shape[1], 0], [crop.shape[1], crop.shape[0]], [0, crop.shape[0]]])
    dst_pts = np.float32([[0, 0], [128, 0], [128, 32], [0, 32]])
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    warped = cv2.warpPerspective(crop, M, (128, 32))
    return warped

def create_ocr():
    ocr = PaddleOCR(
        det=False,
        rec=True,
        rec_model_dir="models/ocr_model",
        rec_image_shape="3,32,128",
        cls=True,
        use_angle_cls=True,
        use_space_char=True
    )
    ocr.text_recognizer.character = CHAR_LIST
    return ocr

def run_image(img, conf=0.25):
    ocr = create_ocr()
    bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    res = yolo(bgr, conf=conf)[0]
    out = bgr.copy()

    for box in res.boxes.xyxy.cpu().numpy().astype(int):
        warped = correct_perspective(out, box)
        if warped is None:
            continue
        try:
            recs = ocr.ocr(warped, det=False, cls=True)
        except:
            recs = []
        gc.collect()
        print_mem_usage("After OCR")
        txt, score = normalize_ocr(recs)
        plate = format_plate(txt)
        label = f"{plate} ({score:.2f})"
        x1, y1, x2, y2 = box
        cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(out, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    return cv2.cvtColor(out, cv2.COLOR_BGR2RGB), f"{len(res.boxes)} plate(s) detected"

def run_video(video_file, conf=0.25):
    ocr = create_ocr()
    cap = cv2.VideoCapture(video_file)
    fps = cap.get(cv2.CAP_PROP_FPS) or 30
    w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
    writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
    records, idx = [], 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        idx += 1
        t = idx / fps
        res = yolo(frame, conf=conf)[0]

        for box in res.boxes.xyxy.cpu().numpy().astype(int):
            warped = correct_perspective(frame, box)
            if warped is None:
                continue
            try:
                recs = ocr.ocr(warped, det=False, cls=True)
            except:
                recs = []
            gc.collect()
            print_mem_usage("After OCR")
            txt, score = normalize_ocr(recs)
            plate = format_plate(txt)
            raw_txt = plate[5:] if plate.startswith("RAW:") else plate
            if raw_txt != "Unknown":
                records.append({"time_s": round(t, 2), "plate": raw_txt, "conf": round(score, 3)})
            x1, y1, x2, y2 = box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, plate, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        writer.write(frame)

    cap.release()
    writer.release()
    with open("output.json", "w") as f:
        json.dump(records, f, indent=2)
    return out_path, "Done"

with gr.Blocks() as demo:
    gr.Markdown("## 🚗 License Plate Detection + Recognition")
    with gr.Row():
        with gr.Column():
            img_in = gr.Image(type="numpy", label="Upload Image")
            vid_in = gr.File(label="Upload Video (.mp4)")
            conf = gr.Slider(0.0, 1.0, value=0.25, step=0.01, label="YOLO Confidence")
            btn_i = gr.Button("Run Image")
            btn_v = gr.Button("Run Video")
        with gr.Column():
            img_out = gr.Image(type="numpy", label="Annotated Image")
            vid_out = gr.Video(label="Annotated Video")
            status = gr.Textbox(label="Status / JSON Path")

    btn_i.click(run_image, [img_in, conf], [img_out, status])
    btn_v.click(run_video, [vid_in, conf], [vid_out, status])

if __name__ == "__main__":
    demo.launch()