Vehicle_Plate_Detection_Recognition

Sleeping

App Files Files Community

APIMONSTER commited on Jun 11, 2025

Commit

8c48635

verified ·

1 Parent(s): 17fbd1d

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -137

app.py CHANGED Viewed

@@ -1,137 +1,146 @@
-import os
-import cv2
-import numpy as np
-import paddle
-import paddle.nn as nn
-import gradio as gr
-from ultralytics import YOLO
-from PIL import Image
-# ─── 1) PlateOCR Sınıfı (fine-tuned model mimarinizle birebir aynı) ───
-MAX_SEQ_LEN = 15
-LABEL_MAP   = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ "
-NUM_CLASSES = len(LABEL_MAP)
-class OCRHead(nn.Layer):
-    def __init__(self, flatten_size):
-        super().__init__()
-        self.dropout = nn.Dropout(0.5)
-        self.fc      = nn.Linear(flatten_size, MAX_SEQ_LEN * NUM_CLASSES)
-    def forward(self, x):
-        x = self.dropout(x)
-        x = self.fc(x)
-        return x.reshape([-1, MAX_SEQ_LEN, NUM_CLASSES])
-class PlateOCR(nn.Layer):
-    def __init__(self):
-        super().__init__()
-        self.backbone = nn.Sequential(
-            nn.Conv2D(3, 32, 3, padding=1), nn.BatchNorm2D(32), nn.ReLU(),
-            nn.MaxPool2D(2,2),
-            nn.Conv2D(32, 64, 3, padding=1), nn.BatchNorm2D(64), nn.ReLU(),
-            nn.MaxPool2D(2,2),
-            nn.Conv2D(64,128,3, padding=1), nn.BatchNorm2D(128),nn.ReLU(),
-            nn.MaxPool2D(2,2), nn.Dropout(0.25)
-        )
-        # dummy tensor ile flatten boyutunu hesapla
-        dummy = paddle.randn([1,3,32,128])
-        flat_size = paddle.flatten(self.backbone(dummy),1).shape[1]
-        self.head = OCRHead(flat_size)
-    def forward(self, x):
-        x = self.backbone(x)
-        x = paddle.flatten(x,1)
-        return self.head(x)
-# ─── 2) Greedy Decode ───
-def greedy_decode(logits):
-    # logits: [B, T, C]
-    preds = logits.numpy().argmax(axis=2)  # [B, T]
-    texts = []
-    for seq in preds:
-        prev = -1
-        chars = []
-        for idx in seq:
-            if idx != prev and idx < NUM_CLASSES:
-                chars.append(LABEL_MAP[idx])
-            prev = idx
-        texts.append("".join(chars).strip())
-    return texts
-# ─── 3) Modelleri Yükle ───
-# YOLO
-yolo = YOLO("models/best.pt")
-# PlateOCR
-plate_ocr = PlateOCR()
-plate_ocr.set_state_dict(paddle.load("models/best_plate_model.pdparams"))
-plate_ocr.eval()
-# ─── 4) Pipeline Fonksiyonu ───
-def detect_and_read(image, conf_thresh):
-    """ YOLO ile plakayı kes, PlateOCR ile oku, görselleştir. """
-    if image is None:
-        return None, "❌ Upload an image."
-    # 1) BGR→RGB
-    img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-    # 2) YOLO predict
-    results = yolo.predict(source=img, conf=conf_thresh)[0]
-    boxes   = results.boxes.xyxy.cpu().numpy()  # [N,4]
-    scores  = results.boxes.conf.cpu().numpy()
-    classes = results.boxes.cls.cpu().numpy()
-    annotated = img.copy()
-    ocr_texts = []
-    for (x1,y1,x2,y2), conf in zip(boxes, scores):
-        x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
-        crop = annotated[y1:y2, x1:x2]
-        if crop.size==0:
-            continue
-        # 3) PlateOCR için preprocess
-        plate = cv2.resize(crop, (128,32))
-        arr   = plate.astype("float32")/255.0
-        arr   = arr.transpose(2,0,1)[None,:,:,:]  # [1,3,32,128]
-        inp   = paddle.to_tensor(arr)
-        # 4) OCR inference
-        with paddle.no_grad():
-            out = plate_ocr(inp)                # [1, T, C]
-        text = greedy_decode(out)[0]
-        # 5) Draw box + text + confidences
-        label = f"{text} ({conf:.2f})"
-        # kutu
-        cv2.rectangle(annotated,(x1,y1),(x2,y2),(0,255,0),2)
-        # label arka plan
-        (tw,th),baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8,2)
-        ty = y1 - 10 if y1 -10 > th+baseline else y1+th+10
-        cv2.rectangle(annotated,(x1,ty-th-baseline),(x1+tw,ty+baseline),(0,255,0),cv2.FILLED)
-        cv2.putText(annotated,label,(x1,ty),cv2.FONT_HERSHEY_SIMPLEX,0.8,(0,0,0),2)
-        ocr_texts.append(text)
-    # BGR→RGB döndür
-    out_img = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
-    status  = f"Detected {len(boxes)} plates, OCR: {ocr_texts}"
-    return out_img, status
-# ─── 5) Gradio Arayüzü ───
-with gr.Blocks() as demo:
-    gr.Markdown("## 🚗 Plate Detection + Recognition")
-    with gr.Row():
-        with gr.Column():
-            inp  = gr.Image(type="numpy", label="Upload Image")
-            conf = gr.Slider(0,1,0.25,0.01, label="YOLO Confidence")
-            btn  = gr.Button("Run")
-        with gr.Column():
-            out_img  = gr.Image(type="numpy", label="Annotated")
-            out_text = gr.Textbox(label="Status", interactive=False)
-    btn.click(detect_and_read, [inp, conf], [out_img, out_text])
-    gr.Markdown("---\n**How it works:** YOLO finds plate boxes; your PaddleOCR-fine-tuned PlateOCR reads them.")
-if __name__=="__main__":
-    demo.launch()

+# app.py
+import os
+import cv2
+import json
+import tempfile
+import numpy as np
+from ultralytics import YOLO
+from paddleocr import PaddleOCR
+import gradio as gr
+from pathlib import Path
+from datetime import datetime
+# ─── 1) Load models ───────────────────────────────────────────────
+yolo_model = YOLO("models/best.pt")
+ocr_model  = PaddleOCR(
+    det_model_dir=None,                      # we only use recognition
+    rec_model_dir="models/best_plate_model.pdparams", # your fine-tuned OCR weights
+    use_angle_cls=True,
+    use_space_char=True
+)
+# ─── 2) Plate formatting helper ────────────────────────────────────
+import re
+def format_turkish_plate(plate: str) -> str:
+    m = re.match(r"^(\d{2})([A-Z]{1,3})(\d{2,4})$", plate.replace(" ",""))
+    if m:
+        return f"{m.group(1)} {m.group(2)} {m.group(3)}"
+    return "Unknown"
+# ─── 3) Single‐image inference ─────────────────────────────────────
+def process_image(img_np, conf_thresh=0.25):
+    """
+    Detect plates, OCR them, draw boxes & return annotated image + text list.
+    """
+    # BGR → RGB
+    img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+    # YOLO detection
+    res = yolo_model(img_bgr, iou=0.3, conf=conf_thresh)[0]
+    boxes = res.boxes.xyxy.cpu().numpy()
+    scores = res.boxes.conf.cpu().numpy()
+    annotated = img_bgr.copy()
+    texts = []
+    for (x1,y1,x2,y2), conf in zip(boxes, scores):
+        x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
+        crop = annotated[y1:y2, x1:x2]
+        if crop.size==0: continue
+        # OCR
+        plate = cv2.resize(crop, (128,32))
+        rec = ocr_model.ocr(plate, cls=True)[0]
+        txt = "".join(seg[1][0] for seg in rec)
+        formatted = format_turkish_plate(txt)
+        texts.append((formatted, float(min(seg[1][1] for seg in rec))))
+        # draw
+        label = f"{formatted} ({conf:.2f})"
+        cv2.rectangle(annotated,(x1,y1),(x2,y2),(0,255,0),2)
+        cv2.putText(annotated,label,(x1,y1-5),
+                    cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,0),2)
+    # back to RGB
+    annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
+    status = f"{len(texts)} plate(s) detected"
+    return annotated, status
+# ─── 4) Video inference ───────────────────────────────────────────
+def process_video(video_file, conf_thresh=0.25):
+    """
+    Runs the same pipeline frame by frame, writes output.json, and
+    returns path to an annotated video file for playback.
+    """
+    cap = cv2.VideoCapture(video_file)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    w  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h  = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(tmp_out, fourcc, fps, (w,h))
+    json_out = []
+    frame_i = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        frame_i += 1
+        t = frame_i / fps
+        # detect & OCR
+        res = yolo_model(frame, iou=0.3, conf=conf_thresh)[0]
+        boxes = res.boxes.xyxy.cpu().numpy()
+        for (x1,y1,x2,y2) in boxes:
+            x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
+            crop = frame[y1:y2, x1:x2]
+            if crop.size==0: continue
+            plate = cv2.resize(crop,(128,32))
+            rec   = ocr_model.ocr(plate, cls=True)[0]
+            txt   = "".join(seg[1][0] for seg in rec)
+            formatted = format_turkish_plate(txt)
+            conf_score = min(seg[1][1] for seg in rec)
+            if formatted!="Unknown":
+                json_out.append({
+                    "time_s": round(t,2),
+                    "plate": formatted,
+                    "conf": round(conf_score,3)
+                })
+                cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
+                cv2.putText(frame,formatted,(x1,y1-5),
+                            cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,0),2)
+        writer.write(frame)
+    cap.release()
+    writer.release()
+    # save JSON
+    with open("output.json","w") as jf:
+        json.dump(json_out, jf, indent=2)
+    return tmp_out
+# ─── 5) Gradio UI ────────────────────────────────────────────────
+with gr.Blocks() as demo:
+    gr.Markdown("## 🚗 Plate Detection + Recognition")
+    with gr.Row():
+        with gr.Column():
+            inp_img = gr.Image(type="numpy", label="Upload Image")
+            inp_vid = gr.File(label="Upload Video (.mp4)")
+            conf    = gr.Slider(0,1,0.25,0.01, label="YOLO Confidence")
+            btn_img = gr.Button("Run Image")
+            btn_vid = gr.Button("Run Video")
+        with gr.Column():
+            out_img  = gr.Image(type="numpy", label="Annotated Image")
+            out_vid  = gr.Video(label="Annotated Video")
+            out_txt  = gr.Textbox(label="Status / JSON Path")
+    btn_img.click(process_image, [inp_img, conf], [out_img, out_txt])
+    btn_vid.click(process_video, [inp_vid, conf], [out_vid, out_txt])
+if __name__ == "__main__":
+    demo.launch()