APIMONSTER commited on
Commit
8c48635
Β·
verified Β·
1 Parent(s): 17fbd1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -137
app.py CHANGED
@@ -1,137 +1,146 @@
1
- import os
2
- import cv2
3
- import numpy as np
4
- import paddle
5
- import paddle.nn as nn
6
- import gradio as gr
7
- from ultralytics import YOLO
8
- from PIL import Image
9
-
10
- # ─── 1) PlateOCR SΔ±nΔ±fΔ± (fine-tuned model mimarinizle birebir aynΔ±) ───
11
- MAX_SEQ_LEN = 15
12
- LABEL_MAP = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ "
13
- NUM_CLASSES = len(LABEL_MAP)
14
-
15
- class OCRHead(nn.Layer):
16
- def __init__(self, flatten_size):
17
- super().__init__()
18
- self.dropout = nn.Dropout(0.5)
19
- self.fc = nn.Linear(flatten_size, MAX_SEQ_LEN * NUM_CLASSES)
20
-
21
- def forward(self, x):
22
- x = self.dropout(x)
23
- x = self.fc(x)
24
- return x.reshape([-1, MAX_SEQ_LEN, NUM_CLASSES])
25
-
26
- class PlateOCR(nn.Layer):
27
- def __init__(self):
28
- super().__init__()
29
- self.backbone = nn.Sequential(
30
- nn.Conv2D(3, 32, 3, padding=1), nn.BatchNorm2D(32), nn.ReLU(),
31
- nn.MaxPool2D(2,2),
32
- nn.Conv2D(32, 64, 3, padding=1), nn.BatchNorm2D(64), nn.ReLU(),
33
- nn.MaxPool2D(2,2),
34
- nn.Conv2D(64,128,3, padding=1), nn.BatchNorm2D(128),nn.ReLU(),
35
- nn.MaxPool2D(2,2), nn.Dropout(0.25)
36
- )
37
- # dummy tensor ile flatten boyutunu hesapla
38
- dummy = paddle.randn([1,3,32,128])
39
- flat_size = paddle.flatten(self.backbone(dummy),1).shape[1]
40
- self.head = OCRHead(flat_size)
41
-
42
- def forward(self, x):
43
- x = self.backbone(x)
44
- x = paddle.flatten(x,1)
45
- return self.head(x)
46
-
47
- # ─── 2) Greedy Decode ───
48
- def greedy_decode(logits):
49
- # logits: [B, T, C]
50
- preds = logits.numpy().argmax(axis=2) # [B, T]
51
- texts = []
52
- for seq in preds:
53
- prev = -1
54
- chars = []
55
- for idx in seq:
56
- if idx != prev and idx < NUM_CLASSES:
57
- chars.append(LABEL_MAP[idx])
58
- prev = idx
59
- texts.append("".join(chars).strip())
60
- return texts
61
-
62
- # ─── 3) Modelleri YΓΌkle ───
63
- # YOLO
64
- yolo = YOLO("models/best.pt")
65
-
66
- # PlateOCR
67
- plate_ocr = PlateOCR()
68
- plate_ocr.set_state_dict(paddle.load("models/best_plate_model.pdparams"))
69
- plate_ocr.eval()
70
-
71
- # ─── 4) Pipeline Fonksiyonu ───
72
- def detect_and_read(image, conf_thresh):
73
- """ YOLO ile plakayı kes, PlateOCR ile oku, gârselleştir. """
74
- if image is None:
75
- return None, "❌ Upload an image."
76
-
77
- # 1) BGR→RGB
78
- img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
79
- # 2) YOLO predict
80
- results = yolo.predict(source=img, conf=conf_thresh)[0]
81
- boxes = results.boxes.xyxy.cpu().numpy() # [N,4]
82
- scores = results.boxes.conf.cpu().numpy()
83
- classes = results.boxes.cls.cpu().numpy()
84
-
85
- annotated = img.copy()
86
- ocr_texts = []
87
-
88
- for (x1,y1,x2,y2), conf in zip(boxes, scores):
89
- x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
90
- crop = annotated[y1:y2, x1:x2]
91
- if crop.size==0:
92
- continue
93
-
94
- # 3) PlateOCR iΓ§in preprocess
95
- plate = cv2.resize(crop, (128,32))
96
- arr = plate.astype("float32")/255.0
97
- arr = arr.transpose(2,0,1)[None,:,:,:] # [1,3,32,128]
98
- inp = paddle.to_tensor(arr)
99
-
100
- # 4) OCR inference
101
- with paddle.no_grad():
102
- out = plate_ocr(inp) # [1, T, C]
103
- text = greedy_decode(out)[0]
104
-
105
- # 5) Draw box + text + confidences
106
- label = f"{text} ({conf:.2f})"
107
- # kutu
108
- cv2.rectangle(annotated,(x1,y1),(x2,y2),(0,255,0),2)
109
- # label arka plan
110
- (tw,th),baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8,2)
111
- ty = y1 - 10 if y1 -10 > th+baseline else y1+th+10
112
- cv2.rectangle(annotated,(x1,ty-th-baseline),(x1+tw,ty+baseline),(0,255,0),cv2.FILLED)
113
- cv2.putText(annotated,label,(x1,ty),cv2.FONT_HERSHEY_SIMPLEX,0.8,(0,0,0),2)
114
-
115
- ocr_texts.append(text)
116
-
117
- # BGR→RGB dândür
118
- out_img = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
119
- status = f"Detected {len(boxes)} plates, OCR: {ocr_texts}"
120
- return out_img, status
121
-
122
- # ─── 5) Gradio ArayΓΌzΓΌ ───
123
- with gr.Blocks() as demo:
124
- gr.Markdown("## πŸš— Plate Detection + Recognition")
125
- with gr.Row():
126
- with gr.Column():
127
- inp = gr.Image(type="numpy", label="Upload Image")
128
- conf = gr.Slider(0,1,0.25,0.01, label="YOLO Confidence")
129
- btn = gr.Button("Run")
130
- with gr.Column():
131
- out_img = gr.Image(type="numpy", label="Annotated")
132
- out_text = gr.Textbox(label="Status", interactive=False)
133
- btn.click(detect_and_read, [inp, conf], [out_img, out_text])
134
- gr.Markdown("---\n**How it works:** YOLO finds plate boxes; your PaddleOCR-fine-tuned PlateOCR reads them.")
135
-
136
- if __name__=="__main__":
137
- demo.launch()
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import cv2
4
+ import json
5
+ import tempfile
6
+ import numpy as np
7
+ from ultralytics import YOLO
8
+ from paddleocr import PaddleOCR
9
+ import gradio as gr
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+
13
+ # ─── 1) Load models ───────────────────────────────────────────────
14
+ yolo_model = YOLO("models/best.pt")
15
+ ocr_model = PaddleOCR(
16
+ det_model_dir=None, # we only use recognition
17
+ rec_model_dir="models/best_plate_model.pdparams", # your fine-tuned OCR weights
18
+ use_angle_cls=True,
19
+ use_space_char=True
20
+ )
21
+
22
+ # ─── 2) Plate formatting helper ────────────────────────────────────
23
+ import re
24
+ def format_turkish_plate(plate: str) -> str:
25
+ m = re.match(r"^(\d{2})([A-Z]{1,3})(\d{2,4})$", plate.replace(" ",""))
26
+ if m:
27
+ return f"{m.group(1)} {m.group(2)} {m.group(3)}"
28
+ return "Unknown"
29
+
30
+ # ─── 3) Single‐image inference ─────────────────────────────────────
31
+ def process_image(img_np, conf_thresh=0.25):
32
+ """
33
+ Detect plates, OCR them, draw boxes & return annotated image + text list.
34
+ """
35
+ # BGR β†’ RGB
36
+ img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
37
+ # YOLO detection
38
+ res = yolo_model(img_bgr, iou=0.3, conf=conf_thresh)[0]
39
+ boxes = res.boxes.xyxy.cpu().numpy()
40
+ scores = res.boxes.conf.cpu().numpy()
41
+
42
+ annotated = img_bgr.copy()
43
+ texts = []
44
+ for (x1,y1,x2,y2), conf in zip(boxes, scores):
45
+ x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
46
+ crop = annotated[y1:y2, x1:x2]
47
+ if crop.size==0: continue
48
+
49
+ # OCR
50
+ plate = cv2.resize(crop, (128,32))
51
+ rec = ocr_model.ocr(plate, cls=True)[0]
52
+ txt = "".join(seg[1][0] for seg in rec)
53
+ formatted = format_turkish_plate(txt)
54
+ texts.append((formatted, float(min(seg[1][1] for seg in rec))))
55
+
56
+ # draw
57
+ label = f"{formatted} ({conf:.2f})"
58
+ cv2.rectangle(annotated,(x1,y1),(x2,y2),(0,255,0),2)
59
+ cv2.putText(annotated,label,(x1,y1-5),
60
+ cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,0),2)
61
+
62
+ # back to RGB
63
+ annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
64
+ status = f"{len(texts)} plate(s) detected"
65
+ return annotated, status
66
+
67
+ # ─── 4) Video inference ───────────────────────────────────────────
68
+ def process_video(video_file, conf_thresh=0.25):
69
+ """
70
+ Runs the same pipeline frame by frame, writes output.json, and
71
+ returns path to an annotated video file for playback.
72
+ """
73
+ cap = cv2.VideoCapture(video_file)
74
+ fps = cap.get(cv2.CAP_PROP_FPS)
75
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
76
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
77
+
78
+ tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
79
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
80
+ writer = cv2.VideoWriter(tmp_out, fourcc, fps, (w,h))
81
+
82
+ json_out = []
83
+ frame_i = 0
84
+ while True:
85
+ ret, frame = cap.read()
86
+ if not ret: break
87
+ frame_i += 1
88
+ t = frame_i / fps
89
+
90
+ # detect & OCR
91
+ res = yolo_model(frame, iou=0.3, conf=conf_thresh)[0]
92
+ boxes = res.boxes.xyxy.cpu().numpy()
93
+ for (x1,y1,x2,y2) in boxes:
94
+ x1,y1,x2,y2 = map(int,(x1,y1,x2,y2))
95
+ crop = frame[y1:y2, x1:x2]
96
+ if crop.size==0: continue
97
+
98
+ plate = cv2.resize(crop,(128,32))
99
+ rec = ocr_model.ocr(plate, cls=True)[0]
100
+ txt = "".join(seg[1][0] for seg in rec)
101
+ formatted = format_turkish_plate(txt)
102
+ conf_score = min(seg[1][1] for seg in rec)
103
+
104
+ if formatted!="Unknown":
105
+ json_out.append({
106
+ "time_s": round(t,2),
107
+ "plate": formatted,
108
+ "conf": round(conf_score,3)
109
+ })
110
+
111
+ cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
112
+ cv2.putText(frame,formatted,(x1,y1-5),
113
+ cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,0),2)
114
+
115
+ writer.write(frame)
116
+
117
+ cap.release()
118
+ writer.release()
119
+
120
+ # save JSON
121
+ with open("output.json","w") as jf:
122
+ json.dump(json_out, jf, indent=2)
123
+
124
+ return tmp_out
125
+
126
+ # ─── 5) Gradio UI ────────────────────────────────────────────────
127
+ with gr.Blocks() as demo:
128
+ gr.Markdown("## πŸš— Plate Detection + Recognition")
129
+ with gr.Row():
130
+ with gr.Column():
131
+ inp_img = gr.Image(type="numpy", label="Upload Image")
132
+ inp_vid = gr.File(label="Upload Video (.mp4)")
133
+ conf = gr.Slider(0,1,0.25,0.01, label="YOLO Confidence")
134
+ btn_img = gr.Button("Run Image")
135
+ btn_vid = gr.Button("Run Video")
136
+ with gr.Column():
137
+ out_img = gr.Image(type="numpy", label="Annotated Image")
138
+ out_vid = gr.Video(label="Annotated Video")
139
+ out_txt = gr.Textbox(label="Status / JSON Path")
140
+
141
+ btn_img.click(process_image, [inp_img, conf], [out_img, out_txt])
142
+ btn_vid.click(process_video, [inp_vid, conf], [out_vid, out_txt])
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()
146
+