Abs6187
/

billboard-hoarding-mvp

Model card Files Files and versions

xet

Community

Abs6187 commited on about 1 month ago

Commit

97a4d76

verified ·

1 Parent(s): dad77f7

Add billboard detector MVP

Browse files

Files changed (1) hide show

billboard_detector.py +138 -0

billboard_detector.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""
+Fast MVP: Billboard/Hoarding Detection + Visibility Scoring + Video Support
+Uses pre-trained YOLO11-nano (5MB) for billboard detection.
+Adds human-attention-like visibility scoring (size + centrality + contrast).
+"""
+import argparse, os, cv2, numpy as np, torch
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+from ultralytics import YOLO
+REPO_ID = "maco018/billboard-detection-Yolo11"
+MODEL_FILE = "yolo11n.pt"  # nano = fastest, smallest
+def download_model():
+    print("Downloading pre-trained billboard detector...")
+    return hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE)
+def visibility_score(img, box):
+    """
+    Mimic human attention / visibility for a detected billboard.
+    Factors: size, centrality, brightness contrast.
+    Returns score 0-1.
+    """
+    h, w = img.shape[:2]
+    x1, y1, x2, y2 = box
+    area = (x2 - x1) * (y2 - y1)
+    size_score = min(area / (h * w * 0.5), 1.0)  # bigger = more visible, cap at 50% image
+    cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
+    # center bias (human attention peaks near center)
+    dx = abs(cx - w / 2) / (w / 2)
+    dy = abs(cy - h / 2) / (h / 2)
+    centrality = 1.0 - (dx + dy) / 2.0
+    # brightness contrast within box vs surrounding
+    roi = img[int(y1):int(y2), int(x1):int(x2)]
+    if roi.size == 0:
+        contrast = 0.5
+    else:
+        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY).mean()
+        full_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean()
+        contrast = min(abs(roi_gray - full_gray) / 255.0 + 0.3, 1.0)
+    score = 0.4 * size_score + 0.35 * centrality + 0.25 * contrast
+    return score
+def draw_results(img, boxes, classes, scores, confs, vis_scores):
+    out = img.copy()
+    h, w = out.shape[:2]
+    for box, cls, conf, vis in zip(boxes, classes, confs, vis_scores):
+        x1, y1, x2, y2 = map(int, box)
+        color = (0, 255, 0) if vis > 0.6 else (0, 165, 255) if vis > 0.4 else (0, 0, 255)
+        cv2.rectangle(out, (x1, y1), (x2, y2), color, 2)
+        label = f"{cls}: conf={conf:.2f} vis={vis:.2f}"
+        cv2.putText(out, label, (x1, max(y1 - 10, 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+    return out
+def process_image(model, img_path, out_dir):
+    img = cv2.imread(str(img_path))
+    if img is None:
+        print(f"Failed to load {img_path}")
+        return
+    results = model(img, verbose=False)
+    r = results[0]
+    if r.boxes is None or len(r.boxes) == 0:
+        print(f"No billboards in {img_path}")
+        cv2.imwrite(str(out_dir / f"{Path(img_path).stem}_nodet.jpg"), img)
+        return
+    boxes = r.boxes.xyxy.cpu().numpy()
+    classes = [r.names[int(c)] for c in r.boxes.cls.cpu().numpy()]
+    confs = r.boxes.conf.cpu().numpy()
+    vis_scores = [visibility_score(img, b) for b in boxes]
+    out = draw_results(img, boxes, classes, None, confs, vis_scores)
+    out_path = out_dir / f"{Path(img_path).stem}_result.jpg"
+    cv2.imwrite(str(out_path), out)
+    print(f"Saved {out_path} | Detections: {len(boxes)}")
+    for c, conf, vis in zip(classes, confs, vis_scores):
+        print(f"  -> {c}: conf={conf:.2f}, visibility={vis:.2f}")
+def process_video(model, video_path, out_dir, sample_every=5):
+    cap = cv2.VideoCapture(str(video_path))
+    fps = cap.get(cv2.CAP_PROP_FPS) or 25
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    out_path = out_dir / f"{Path(video_path).stem}_result.mp4"
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(str(out_path), fourcc, fps / sample_every, (w, h))
+    frame_idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_idx % sample_every != 0:
+            frame_idx += 1
+            continue
+        results = model(frame, verbose=False)
+        r = results[0]
+        if r.boxes is not None and len(r.boxes) > 0:
+            boxes = r.boxes.xyxy.cpu().numpy()
+            classes = [r.names[int(c)] for c in r.boxes.cls.cpu().numpy()]
+            confs = r.boxes.conf.cpu().numpy()
+            vis_scores = [visibility_score(frame, b) for b in boxes]
+            frame = draw_results(frame, boxes, classes, None, confs, vis_scores)
+        writer.write(frame)
+        frame_idx += 1
+    cap.release()
+    writer.release()
+    print(f"Saved video: {out_path} ({frame_idx // sample_every} frames processed)")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="Image or video path")
+    parser.add_argument("--out", default="outputs", help="Output directory")
+    parser.add_argument("--video-skip", type=int, default=5, help="Process every Nth frame for video")
+    args = parser.parse_args()
+    out_dir = Path(args.out)
+    out_dir.mkdir(exist_ok=True)
+    model_path = download_model()
+    print(f"Model loaded: {model_path}")
+    model = YOLO(model_path)
+    inp = Path(args.input)
+    if inp.suffix.lower() in {".mp4", ".avi", ".mov", ".mkv", ".webm"}:
+        process_video(model, inp, out_dir, args.video_skip)
+    else:
+        process_image(model, inp, out_dir)
+if __name__ == "__main__":
+    main()