Spaces:
Running
Running
File size: 5,273 Bytes
a0e95d4 aec9462 52b0e0c 8c48635 b424e54 1d550f3 72b3efa 3a43469 52b0e0c 84a96e9 0839a8a 1d550f3 84a96e9 3a43469 84a96e9 3a43469 84a96e9 e20642b 52b0e0c c557d41 aec9462 3c9a517 3a43469 3c9a517 3a43469 3c9a517 3a43469 72b3efa 3a43469 72b3efa 84a96e9 72b3efa 089fa45 aec9462 3c9a517 3a43469 aec9462 3c9a517 aec9462 3a43469 aec9462 3c9a517 3a43469 aec9462 72b3efa 3a43469 10b8a7d 52b0e0c 3c9a517 52b0e0c 3a43469 aec9462 52b0e0c 8c48635 847a398 3a43469 84a96e9 3c9a517 aec9462 3c9a517 3a43469 aec9462 3c9a517 aec9462 3a43469 aec9462 3a43469 3c9a517 3a43469 3c9a517 3a43469 97b6396 eefc184 97b6396 3a43469 a0e95d4 52b0e0c 0839a8a 8c48635 a0e95d4 8c48635 0839a8a 3a43469 8c48635 0839a8a 3a43469 089fa45 52b0e0c 0839a8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# app.py
import re, json, tempfile
import cv2
import numpy as np
import gradio as gr
import gc
import psutil
from ultralytics import YOLO
from paddleocr import PaddleOCR
np.int = int # For backward compatibility
CHAR_LIST = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ ")
yolo = YOLO("models/best.pt")
def print_mem_usage(tag=""):
mem = psutil.virtual_memory()
print(f"[{tag}] RAM usage: {mem.used / 1024**2:.2f} MB / {mem.total / 1024**2:.2f} MB ({mem.percent}%)")
def normalize_ocr(recs):
if not recs:
return "", 0.0
first = recs[0]
if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[0], str):
return first[0], float(first[1])
if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[1], (list, tuple)):
return first[1][0], float(first[1][1])
return "", 0.0
def format_plate(s: str) -> str:
s = re.sub(r'[^A-Z0-9]', '', s.upper())
m = re.match(r'^(\d{2})([A-Z]{1,3})(\d{2,4})$', s)
return f"{m.group(1)} {m.group(2)} {m.group(3)}" if m else f"RAW: {s}" if s else "Unknown"
def correct_perspective(image, box):
x1, y1, x2, y2 = box
h, w = image.shape[:2]
margin = 5
x1 = max(0, x1 - margin)
y1 = max(0, y1 - margin)
x2 = min(w, x2 + margin)
y2 = min(h, y2 + margin)
crop = image[y1:y2, x1:x2]
if crop.size == 0:
return None
src_pts = np.float32([[0, 0], [crop.shape[1], 0], [crop.shape[1], crop.shape[0]], [0, crop.shape[0]]])
dst_pts = np.float32([[0, 0], [128, 0], [128, 32], [0, 32]])
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(crop, M, (128, 32))
return warped
def create_ocr():
ocr = PaddleOCR(
det=False,
rec=True,
rec_model_dir="models/ocr_model",
rec_image_shape="3,32,128",
cls=True,
use_angle_cls=True,
use_space_char=True
)
ocr.text_recognizer.character = CHAR_LIST
return ocr
def run_image(img, conf=0.25):
ocr = create_ocr()
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
res = yolo(bgr, conf=conf)[0]
out = bgr.copy()
for box in res.boxes.xyxy.cpu().numpy().astype(int):
warped = correct_perspective(out, box)
if warped is None:
continue
try:
recs = ocr.ocr(warped, det=False, cls=True)
except:
recs = []
gc.collect()
print_mem_usage("After OCR")
txt, score = normalize_ocr(recs)
plate = format_plate(txt)
label = f"{plate} ({score:.2f})"
x1, y1, x2, y2 = box
cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(out, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
return cv2.cvtColor(out, cv2.COLOR_BGR2RGB), f"{len(res.boxes)} plate(s) detected"
def run_video(video_file, conf=0.25):
ocr = create_ocr()
cap = cv2.VideoCapture(video_file)
fps = cap.get(cv2.CAP_PROP_FPS) or 30
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
records, idx = [], 0
while True:
ret, frame = cap.read()
if not ret:
break
idx += 1
t = idx / fps
res = yolo(frame, conf=conf)[0]
for box in res.boxes.xyxy.cpu().numpy().astype(int):
warped = correct_perspective(frame, box)
if warped is None:
continue
try:
recs = ocr.ocr(warped, det=False, cls=True)
except:
recs = []
gc.collect()
print_mem_usage("After OCR")
txt, score = normalize_ocr(recs)
plate = format_plate(txt)
raw_txt = plate[5:] if plate.startswith("RAW:") else plate
if raw_txt != "Unknown":
records.append({"time_s": round(t, 2), "plate": raw_txt, "conf": round(score, 3)})
x1, y1, x2, y2 = box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, plate, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
writer.write(frame)
cap.release()
writer.release()
with open("output.json", "w") as f:
json.dump(records, f, indent=2)
return out_path, "Done"
with gr.Blocks() as demo:
gr.Markdown("## 🚗 License Plate Detection + Recognition")
with gr.Row():
with gr.Column():
img_in = gr.Image(type="numpy", label="Upload Image")
vid_in = gr.File(label="Upload Video (.mp4)")
conf = gr.Slider(0.0, 1.0, value=0.25, step=0.01, label="YOLO Confidence")
btn_i = gr.Button("Run Image")
btn_v = gr.Button("Run Video")
with gr.Column():
img_out = gr.Image(type="numpy", label="Annotated Image")
vid_out = gr.Video(label="Annotated Video")
status = gr.Textbox(label="Status / JSON Path")
btn_i.click(run_image, [img_in, conf], [img_out, status])
btn_v.click(run_video, [vid_in, conf], [vid_out, status])
if __name__ == "__main__":
demo.launch()
|