DeepSight / utils.py
ShadowGard3n's picture
First commit
36e7618
import cv2
import numpy as np
from ultralytics import YOLO
import easyocr
import os
import uuid
# =========================
# Load YOLO model
# =========================
MODEL_PATH = "weights/best.pt"
if not os.path.exists(MODEL_PATH):
print(f"❌ ERROR: Model weights not found at {MODEL_PATH}")
model = YOLO(MODEL_PATH)
# =========================
# EasyOCR Init
# =========================
reader = easyocr.Reader(['en'], gpu=False) # set gpu=False if needed
# =========================
# Debug folder
# =========================
DEBUG_DIR = "debug_crops"
os.makedirs(DEBUG_DIR, exist_ok=True)
# =========================
# OCR FUNCTION (EasyOCR)
# =========================
def get_ocr_text(crop, crop_id):
try:
if crop is None or crop.size == 0:
return None, 0.0
crop = cv2.resize(crop, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
crop_rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
cv2.imwrite(f"{DEBUG_DIR}/{crop_id}.jpg", crop)
results = reader.readtext(crop_rgb)
if not results:
return None, 0.0
texts = []
confidences = []
for (bbox, text, conf) in results:
clean_text = "".join([c for c in text if c.isalnum()])
if len(clean_text) >= 2: # allow small parts like "L8"
texts.append(clean_text)
confidences.append(conf)
if not texts:
return None, 0.0
# πŸ”₯ SORT by vertical position (top β†’ bottom)
results_sorted = sorted(results, key=lambda x: min([p[1] for p in x[0]]))
final_text = ""
for (_, text, _) in results_sorted:
clean = "".join([c for c in text if c.isalnum()])
if len(clean) >= 2:
final_text += clean
avg_conf = sum(confidences) / len(confidences)
print(f"βœ… OCR [{crop_id}] -> {final_text}")
return final_text, float(avg_conf)
except Exception as e:
print(f"OCR Error: {e}")
return None, 0.0
# =========================
# IMAGE PROCESSING
# =========================
def process_image(image):
print(f"\n[STEP 1] Running YOLO detection...")
results = model(image, imgsz=320, verbose=False)
plates = []
found_count = len(results[0].boxes)
print(f"[STEP 2] YOLO found {found_count} bounding boxes.")
for r in results:
if r.boxes:
for box in r.boxes.xyxy.cpu().numpy():
crop_id = f"plate_{uuid.uuid4().hex[:6]}"
x1, y1, x2, y2 = map(int, box)
# πŸ”₯ IMPORTANT FIX: bigger padding
h, w, _ = image.shape
pad = 15
crop = image[
max(0, y1 - pad):min(h, y2 + pad),
max(0, x1 - pad):min(w, x2 + pad)
]
print(f"[STEP 3] Processing {crop_id}...")
text, conf = get_ocr_text(crop, crop_id)
if text:
plates.append({
"text": text,
"confidence": conf,
"debug_id": crop_id
})
else:
print(f" ⚠️ OCR failed for {crop_id}")
return plates
# =========================
# VIDEO PROCESSING
# =========================
def process_video_stream(video_path):
print(f"\nπŸŽ₯ Processing video: {video_path}")
cap = cv2.VideoCapture(video_path)
tracked_plates = {}
final_results = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
results = model.track(frame, persist=True, imgsz=320, verbose=False)
if results[0].boxes.id is not None:
ids = results[0].boxes.id.int().cpu().tolist()
boxes = results[0].boxes.xyxy.cpu().numpy()
for box, tid in zip(boxes, ids):
if tid not in tracked_plates:
x1, y1, x2, y2 = map(int, box)
crop_id = f"track_{tid}"
crop = frame[y1:y2, x1:x2]
text, conf = get_ocr_text(crop, crop_id)
if text and len(text) >= 5:
print(f"βœ… Detected Plate: {text}")
tracked_plates[tid] = text
final_results.append(text)
cap.release()
return list(set(final_results))