Spaces:
Running
Running
| # app.py | |
| import re, json, tempfile | |
| import cv2 | |
| import numpy as np | |
| import gradio as gr | |
| import gc | |
| import psutil | |
| from ultralytics import YOLO | |
| from paddleocr import PaddleOCR | |
| np.int = int # For backward compatibility | |
| CHAR_LIST = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ ") | |
| yolo = YOLO("models/best.pt") | |
| def print_mem_usage(tag=""): | |
| mem = psutil.virtual_memory() | |
| print(f"[{tag}] RAM usage: {mem.used / 1024**2:.2f} MB / {mem.total / 1024**2:.2f} MB ({mem.percent}%)") | |
| def normalize_ocr(recs): | |
| if not recs: | |
| return "", 0.0 | |
| first = recs[0] | |
| if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[0], str): | |
| return first[0], float(first[1]) | |
| if isinstance(first, (list, tuple)) and len(first) == 2 and isinstance(first[1], (list, tuple)): | |
| return first[1][0], float(first[1][1]) | |
| return "", 0.0 | |
| def format_plate(s: str) -> str: | |
| s = re.sub(r'[^A-Z0-9]', '', s.upper()) | |
| m = re.match(r'^(\d{2})([A-Z]{1,3})(\d{2,4})$', s) | |
| return f"{m.group(1)} {m.group(2)} {m.group(3)}" if m else f"RAW: {s}" if s else "Unknown" | |
| def correct_perspective(image, box): | |
| x1, y1, x2, y2 = box | |
| h, w = image.shape[:2] | |
| margin = 5 | |
| x1 = max(0, x1 - margin) | |
| y1 = max(0, y1 - margin) | |
| x2 = min(w, x2 + margin) | |
| y2 = min(h, y2 + margin) | |
| crop = image[y1:y2, x1:x2] | |
| if crop.size == 0: | |
| return None | |
| src_pts = np.float32([[0, 0], [crop.shape[1], 0], [crop.shape[1], crop.shape[0]], [0, crop.shape[0]]]) | |
| dst_pts = np.float32([[0, 0], [128, 0], [128, 32], [0, 32]]) | |
| M = cv2.getPerspectiveTransform(src_pts, dst_pts) | |
| warped = cv2.warpPerspective(crop, M, (128, 32)) | |
| return warped | |
| def create_ocr(): | |
| ocr = PaddleOCR( | |
| det=False, | |
| rec=True, | |
| rec_model_dir="models/ocr_model", | |
| rec_image_shape="3,32,128", | |
| cls=True, | |
| use_angle_cls=True, | |
| use_space_char=True | |
| ) | |
| ocr.text_recognizer.character = CHAR_LIST | |
| return ocr | |
| def run_image(img, conf=0.25): | |
| ocr = create_ocr() | |
| bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| res = yolo(bgr, conf=conf)[0] | |
| out = bgr.copy() | |
| for box in res.boxes.xyxy.cpu().numpy().astype(int): | |
| warped = correct_perspective(out, box) | |
| if warped is None: | |
| continue | |
| try: | |
| recs = ocr.ocr(warped, det=False, cls=True) | |
| except: | |
| recs = [] | |
| gc.collect() | |
| print_mem_usage("After OCR") | |
| txt, score = normalize_ocr(recs) | |
| plate = format_plate(txt) | |
| label = f"{plate} ({score:.2f})" | |
| x1, y1, x2, y2 = box | |
| cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| cv2.putText(out, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) | |
| return cv2.cvtColor(out, cv2.COLOR_BGR2RGB), f"{len(res.boxes)} plate(s) detected" | |
| def run_video(video_file, conf=0.25): | |
| ocr = create_ocr() | |
| cap = cv2.VideoCapture(video_file) | |
| fps = cap.get(cv2.CAP_PROP_FPS) or 30 | |
| w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name | |
| writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) | |
| records, idx = [], 0 | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| idx += 1 | |
| t = idx / fps | |
| res = yolo(frame, conf=conf)[0] | |
| for box in res.boxes.xyxy.cpu().numpy().astype(int): | |
| warped = correct_perspective(frame, box) | |
| if warped is None: | |
| continue | |
| try: | |
| recs = ocr.ocr(warped, det=False, cls=True) | |
| except: | |
| recs = [] | |
| gc.collect() | |
| print_mem_usage("After OCR") | |
| txt, score = normalize_ocr(recs) | |
| plate = format_plate(txt) | |
| raw_txt = plate[5:] if plate.startswith("RAW:") else plate | |
| if raw_txt != "Unknown": | |
| records.append({"time_s": round(t, 2), "plate": raw_txt, "conf": round(score, 3)}) | |
| x1, y1, x2, y2 = box | |
| cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| cv2.putText(frame, plate, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) | |
| writer.write(frame) | |
| cap.release() | |
| writer.release() | |
| with open("output.json", "w") as f: | |
| json.dump(records, f, indent=2) | |
| return out_path, "Done" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🚗 License Plate Detection + Recognition") | |
| with gr.Row(): | |
| with gr.Column(): | |
| img_in = gr.Image(type="numpy", label="Upload Image") | |
| vid_in = gr.File(label="Upload Video (.mp4)") | |
| conf = gr.Slider(0.0, 1.0, value=0.25, step=0.01, label="YOLO Confidence") | |
| btn_i = gr.Button("Run Image") | |
| btn_v = gr.Button("Run Video") | |
| with gr.Column(): | |
| img_out = gr.Image(type="numpy", label="Annotated Image") | |
| vid_out = gr.Video(label="Annotated Video") | |
| status = gr.Textbox(label="Status / JSON Path") | |
| btn_i.click(run_image, [img_in, conf], [img_out, status]) | |
| btn_v.click(run_video, [vid_in, conf], [vid_out, status]) | |
| if __name__ == "__main__": | |
| demo.launch() | |