Abs6187 commited on
Commit
97a4d76
·
verified ·
1 Parent(s): dad77f7

Add billboard detector MVP

Browse files
Files changed (1) hide show
  1. billboard_detector.py +138 -0
billboard_detector.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fast MVP: Billboard/Hoarding Detection + Visibility Scoring + Video Support
3
+ Uses pre-trained YOLO11-nano (5MB) for billboard detection.
4
+ Adds human-attention-like visibility scoring (size + centrality + contrast).
5
+ """
6
+ import argparse, os, cv2, numpy as np, torch
7
+ from pathlib import Path
8
+ from huggingface_hub import hf_hub_download
9
+ from ultralytics import YOLO
10
+
11
+ REPO_ID = "maco018/billboard-detection-Yolo11"
12
+ MODEL_FILE = "yolo11n.pt" # nano = fastest, smallest
13
+
14
+
15
+ def download_model():
16
+ print("Downloading pre-trained billboard detector...")
17
+ return hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE)
18
+
19
+
20
+ def visibility_score(img, box):
21
+ """
22
+ Mimic human attention / visibility for a detected billboard.
23
+ Factors: size, centrality, brightness contrast.
24
+ Returns score 0-1.
25
+ """
26
+ h, w = img.shape[:2]
27
+ x1, y1, x2, y2 = box
28
+ area = (x2 - x1) * (y2 - y1)
29
+ size_score = min(area / (h * w * 0.5), 1.0) # bigger = more visible, cap at 50% image
30
+
31
+ cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
32
+ # center bias (human attention peaks near center)
33
+ dx = abs(cx - w / 2) / (w / 2)
34
+ dy = abs(cy - h / 2) / (h / 2)
35
+ centrality = 1.0 - (dx + dy) / 2.0
36
+
37
+ # brightness contrast within box vs surrounding
38
+ roi = img[int(y1):int(y2), int(x1):int(x2)]
39
+ if roi.size == 0:
40
+ contrast = 0.5
41
+ else:
42
+ roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY).mean()
43
+ full_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean()
44
+ contrast = min(abs(roi_gray - full_gray) / 255.0 + 0.3, 1.0)
45
+
46
+ score = 0.4 * size_score + 0.35 * centrality + 0.25 * contrast
47
+ return score
48
+
49
+
50
+ def draw_results(img, boxes, classes, scores, confs, vis_scores):
51
+ out = img.copy()
52
+ h, w = out.shape[:2]
53
+ for box, cls, conf, vis in zip(boxes, classes, confs, vis_scores):
54
+ x1, y1, x2, y2 = map(int, box)
55
+ color = (0, 255, 0) if vis > 0.6 else (0, 165, 255) if vis > 0.4 else (0, 0, 255)
56
+ cv2.rectangle(out, (x1, y1), (x2, y2), color, 2)
57
+ label = f"{cls}: conf={conf:.2f} vis={vis:.2f}"
58
+ cv2.putText(out, label, (x1, max(y1 - 10, 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
59
+ return out
60
+
61
+
62
+ def process_image(model, img_path, out_dir):
63
+ img = cv2.imread(str(img_path))
64
+ if img is None:
65
+ print(f"Failed to load {img_path}")
66
+ return
67
+ results = model(img, verbose=False)
68
+ r = results[0]
69
+ if r.boxes is None or len(r.boxes) == 0:
70
+ print(f"No billboards in {img_path}")
71
+ cv2.imwrite(str(out_dir / f"{Path(img_path).stem}_nodet.jpg"), img)
72
+ return
73
+ boxes = r.boxes.xyxy.cpu().numpy()
74
+ classes = [r.names[int(c)] for c in r.boxes.cls.cpu().numpy()]
75
+ confs = r.boxes.conf.cpu().numpy()
76
+ vis_scores = [visibility_score(img, b) for b in boxes]
77
+ out = draw_results(img, boxes, classes, None, confs, vis_scores)
78
+ out_path = out_dir / f"{Path(img_path).stem}_result.jpg"
79
+ cv2.imwrite(str(out_path), out)
80
+ print(f"Saved {out_path} | Detections: {len(boxes)}")
81
+ for c, conf, vis in zip(classes, confs, vis_scores):
82
+ print(f" -> {c}: conf={conf:.2f}, visibility={vis:.2f}")
83
+
84
+
85
+ def process_video(model, video_path, out_dir, sample_every=5):
86
+ cap = cv2.VideoCapture(str(video_path))
87
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25
88
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
89
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
90
+ out_path = out_dir / f"{Path(video_path).stem}_result.mp4"
91
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
92
+ writer = cv2.VideoWriter(str(out_path), fourcc, fps / sample_every, (w, h))
93
+ frame_idx = 0
94
+ while True:
95
+ ret, frame = cap.read()
96
+ if not ret:
97
+ break
98
+ if frame_idx % sample_every != 0:
99
+ frame_idx += 1
100
+ continue
101
+ results = model(frame, verbose=False)
102
+ r = results[0]
103
+ if r.boxes is not None and len(r.boxes) > 0:
104
+ boxes = r.boxes.xyxy.cpu().numpy()
105
+ classes = [r.names[int(c)] for c in r.boxes.cls.cpu().numpy()]
106
+ confs = r.boxes.conf.cpu().numpy()
107
+ vis_scores = [visibility_score(frame, b) for b in boxes]
108
+ frame = draw_results(frame, boxes, classes, None, confs, vis_scores)
109
+ writer.write(frame)
110
+ frame_idx += 1
111
+ cap.release()
112
+ writer.release()
113
+ print(f"Saved video: {out_path} ({frame_idx // sample_every} frames processed)")
114
+
115
+
116
+ def main():
117
+ parser = argparse.ArgumentParser()
118
+ parser.add_argument("input", help="Image or video path")
119
+ parser.add_argument("--out", default="outputs", help="Output directory")
120
+ parser.add_argument("--video-skip", type=int, default=5, help="Process every Nth frame for video")
121
+ args = parser.parse_args()
122
+
123
+ out_dir = Path(args.out)
124
+ out_dir.mkdir(exist_ok=True)
125
+
126
+ model_path = download_model()
127
+ print(f"Model loaded: {model_path}")
128
+ model = YOLO(model_path)
129
+
130
+ inp = Path(args.input)
131
+ if inp.suffix.lower() in {".mp4", ".avi", ".mov", ".mkv", ".webm"}:
132
+ process_video(model, inp, out_dir, args.video_skip)
133
+ else:
134
+ process_image(model, inp, out_dir)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ main()