Spaces:

APIMONSTER
/

Car_Plate_Detection

Running

App Files Files Community

APIMONSTER commited on Jun 11, 2025

Commit

72b3efa

verified ·

1 Parent(s): 5a658a9

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -153

app.py CHANGED Viewed

@@ -1,171 +1,87 @@
 # app.py
-import cv2
-import json
-import tempfile
 import numpy as np
-import re
-import paddle
-import paddle.nn as nn
-from ultralytics import YOLO
 import gradio as gr
-from datetime import datetime
-from pathlib import Path
-# ─── 0) PlateOCR model definition ────────────────────────────────
-MAX_SEQ_LEN = 15
-LABEL_MAP   = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ "
-label_to_int = {c: i for i,c in enumerate(LABEL_MAP)}
-int_to_label = {i: c for c,i in label_to_int.items()}
-class OCRHead(nn.Layer):
-    def __init__(self, in_feats):
-        super().__init__()
-        self.fc = nn.Linear(in_feats, MAX_SEQ_LEN * len(LABEL_MAP))
-    def forward(self, x):
-        B = x.shape[0]
-        logits = self.fc(x).reshape([B, MAX_SEQ_LEN, -1])
-        return logits
-class PlateOCRTransfer(nn.Layer):
-    def __init__(self):
-        super().__init__()
-        # same backbone as your training script
-        self.backbone = nn.Sequential(
-            nn.Conv2D(3,32,3,padding=1), nn.BatchNorm2D(32), nn.ReLU(), nn.MaxPool2D(2),
-            nn.Conv2D(32,64,3,padding=1), nn.BatchNorm2D(64), nn.ReLU(), nn.MaxPool2D(2),
-            nn.Conv2D(64,128,3,padding=1), nn.BatchNorm2D(128), nn.ReLU(), nn.MaxPool2D(2),
-            nn.Dropout(0.25)
-        )
-        # determine flattened feature size
-        dummy = paddle.randn([1,3,32,128])
-        flat   = paddle.flatten(self.backbone(dummy),1).shape[1]
-        self.head = OCRHead(flat)
-    def forward(self, x):
-        x = self.backbone(x)
-        x = paddle.flatten(x,1)
-        return self.head(x)
-# ─── 1) Greedy decode ─────────────────────────────────────────────
-def greedy_decode(logits):
-    # logits: [1, T, C]
-    pred = logits.argmax(axis=2).numpy()[0]  # [T]
-    res = []
-    prev = -1
-    for idx in pred:
-        if idx != prev and idx < len(LABEL_MAP):
-            res.append(LABEL_MAP[idx])
-        prev = idx
-    return "".join(res).strip()
-# ─── 2) Load detection & OCR models ───────────────────────────────
-yolo_model = YOLO("models/best.pt")
-ocr_model = PlateOCRTransfer()
-checkpoint = paddle.load("models/best_plate_model.pdparams")
-ocr_model.set_state_dict(checkpoint)
-ocr_model.eval()
-# ─── 3) Plate formatting helper ───────────────────────────────────
-def format_turkish_plate(plate: str) -> str:
-    m = re.match(r"^(\d{2})([A-Z]{1,3})(\d{2,4})$", plate.replace(" ", ""))
-    if m:
-        return f"{m.group(1)} {m.group(2)} {m.group(3)}"
-    return "Unknown"
-# ─── 4) Single-image pipeline ────────────────────────────────────
-def process_image(img_np, conf_thresh=0.25):
-    img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
-    res     = yolo_model(img_bgr, iou=0.3, conf=conf_thresh)[0]
-    boxes   = res.boxes.xyxy.cpu().numpy()
-    scores  = res.boxes.conf.cpu().numpy()
-    annotated = img_bgr.copy()
-    count = 0
-    for (x1,y1,x2,y2), conf in zip(boxes, scores):
-        x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
-        crop = annotated[y1:y2, x1:x2]
-        if crop.size == 0:
-            continue
-        # preprocess for PlateOCR
-        plate = cv2.resize(crop, (128,32)).astype("float32") / 255.0
-        inp   = paddle.to_tensor(plate.transpose(2,0,1)[None,:,:,:])
-        with paddle.no_grad():
-            logits = ocr_model(inp)     # [1,T,C]
-        txt  = greedy_decode(logits)
-        fmtd = format_turkish_plate(txt)
-        label = f"{fmtd} ({conf:.2f})"
-        cv2.rectangle(annotated,(x1,y1),(x2,y2),(0,255,0),2)
-        cv2.putText(annotated, label, (x1, y1-6),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
-        count += 1
-    out = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
-    return out, f"{count} plate(s) detected"
-# ─── 5) Video pipeline ───────────────────────────────────────────
-def process_video(video_file, conf_thresh=0.25):
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
-    w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-    writer = cv2.VideoWriter(tmp_out, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w,h))
-    logs = []
-    frame_i = 0
     while True:
-        ret, frame = cap.read()
         if not ret: break
-        frame_i += 1
-        t = frame_i / fps
-        res   = yolo_model(frame, iou=0.3, conf=conf_thresh)[0]
-        boxes = res.boxes.xyxy.cpu().numpy()
-        for (x1,y1,x2,y2) in boxes:
-            x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
             crop = frame[y1:y2, x1:x2]
             if crop.size==0: continue
-            plate = cv2.resize(crop,(128,32)).astype("float32")/255.0
-            inp   = paddle.to_tensor(plate.transpose(2,0,1)[None,:,:,:])
-            with paddle.no_grad():
-                logits = ocr_model(inp)
-            txt  = greedy_decode(logits)
-            fmtd = format_turkish_plate(txt)
-            if fmtd!="Unknown":
-                logs.append({"time_s":round(t,2),"plate":fmtd})
-                cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
-                cv2.putText(frame, fmtd, (x1,y1-6),
-                            cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2)
         writer.write(frame)
     cap.release(); writer.release()
-    with open("output.json","w") as f:
-        json.dump(logs,f,indent=2)
-    return tmp_out
-# ─── 6) Gradio UI ───────────────────────────────────────────────
 with gr.Blocks() as demo:
-    gr.Markdown("## 🚗 License Plate Detection & OCR")
     with gr.Row():
         with gr.Column():
-            inp_img = gr.Image(type="numpy", label="Upload Image")
-            inp_vid = gr.File(label="Upload Video (.mp4)")
-            conf    = gr.Slider(0,1,0.25,0.01, label="YOLO Confidence")
-            b1      = gr.Button("Run Image")
-            b2      = gr.Button("Run Video")
         with gr.Column():
-            out_img = gr.Image(type="numpy", label="Annotated Image")
-            out_vid = gr.Video(label="Annotated Video")
-            out_txt = gr.Textbox(label="Status / JSON Path")
-    b1.click(process_image, [inp_img, conf], [out_img, out_txt])
-    b2.click(process_video, [inp_vid, conf], [out_vid, out_txt])
 if __name__=="__main__":
     demo.launch()

 # app.py
+import cv2, json, tempfile, re
 import numpy as np
 import gradio as gr
+from ultralytics import YOLO
+from paddleocr import PaddleOCR
+# 1) load detection + OCR
+yolo = YOLO("models/best.pt")
+ocr  = PaddleOCR(
+  det_model_dir=None,                           # turn off internal detector
+  rec_model_dir="models/ocr_model",  # inference export dir
+  use_textline_orientation=True                  # orientation head
+)
+# 2) helper to enforce “DD AAA NNNN” style
+def fmt_plate(s):
+    m = re.match(r"^(\d{2})([A-Z]{1,3})(\d{2,4})$", s.replace(" ",""))
+    return f"{m[1]} {m[2]} {m[3]}" if m else "Unknown"
+# 3) image pipeline
+def run_image(img, conf=0.25):
+    bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    res = yolo(bgr, conf=conf)[0]
+    out = bgr.copy()
+    for box,score in zip(res.boxes.xyxy.cpu().numpy(), res.boxes.conf.cpu().numpy()):
+        x1,y1,x2,y2 = map(int,box)
+        crop = out[y1:y2, x1:x2]
+        if crop.size==0: continue
+        plate_img = cv2.resize(crop,(128,32))
+        rec = ocr.ocr(plate_img, cls=True)[0]
+        txt = "".join(seg[1][0] for seg in rec)
+        label = fmt_plate(txt)
+        cv2.rectangle(out,(x1,y1),(x2,y2),(0,255,0),2)
+        cv2.putText(out, f"{label} {score:.2f}", (x1,y1-5),
+                    cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2)
+    return cv2.cvtColor(out,cv2.COLOR_BGR2RGB), f"{len(res.boxes)} plates detected"
+# 4) video pipeline (frame‐by‐frame, writes output.json):
+def run_video(video_file, conf=0.25):
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
+    w,h = int(cap.get(3)), int(cap.get(4))
+    out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    writer   = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w,h))
+    records  = []
+    idx      = 0
     while True:
+        ret,frame = cap.read()
         if not ret: break
+        idx+=1; t=idx/fps
+        res = yolo(frame, conf=conf)[0]
+        for (x1,y1,x2,y2) in res.boxes.xyxy.cpu().numpy().astype(int):
             crop = frame[y1:y2, x1:x2]
             if crop.size==0: continue
+            plate = cv2.resize(crop,(128,32))
+            rec   = ocr.ocr(plate, cls=True)[0]
+            txt   = "".join(seg[1][0] for seg in rec)
+            label = fmt_plate(txt)
+            score = min(seg[1][1] for seg in rec) if rec else 0.0
+            if label!="Unknown":
+                records.append({"time_s":round(t,2),"plate":label,"conf":round(score,3)})
+            cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
+            cv2.putText(frame,label,(x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2)
         writer.write(frame)
     cap.release(); writer.release()
+    with open("output.json","w") as f: json.dump(records,f,indent=2)
+    return out_path
+# 5) Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("## 🚗 Plate Detection + Recognition")
     with gr.Row():
         with gr.Column():
+            img_in = gr.Image(type="numpy", label="Image")
+            vid_in = gr.File(label="Video (.mp4)")
+            conf   = gr.Slider(0,1,0.25,0.01, label="YOLO confidence")
+            b1     = gr.Button("Process Image")
+            b2     = gr.Button("Process Video")
         with gr.Column():
+            img_out = gr.Image(type="numpy", label="Result")
+            vid_out = gr.Video(label="Annotated Video")
+            txt_out = gr.Textbox(label="Status / JSON path")
+    b1.click(run_image, [img_in,conf],[img_out,txt_out])
+    b2.click(run_video, [vid_in,conf],[vid_out,txt_out])
 if __name__=="__main__":
     demo.launch()