File size: 15,633 Bytes
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b92f9
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b92f9
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b92f9
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b92f9
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b92f9
5cc06e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
"""
src/localization.py  β€”  Localization Strategy Library
=====================================================
Five strategies that decide WHERE to evaluate a recognition head.
The head stays the same β€” only the search method changes.

Strategies
----------
1. Exhaustive Sliding Window  β€” brute-force grid scan
2. Image Pyramid              β€” multi-scale resize + sliding window
3. Coarse-to-Fine Search      β€” two-pass hierarchical refinement
4. Contour Proposals          β€” edge-driven candidate regions
5. Template Matching          β€” OpenCV cross-correlation (no head)

Every function returns the same tuple:
    (detections, n_proposals, elapsed_ms, heatmap)
"""

import cv2
import numpy as np
import time


# ===================================================================
#  Shared utilities
# ===================================================================

def nms(dets, iou_thresh):
    """Greedy NMS on list of (x1, y1, x2, y2, label, conf)."""
    dets = sorted(dets, key=lambda d: d[5], reverse=True)
    keep = []
    while dets:
        best = dets.pop(0)
        keep.append(best)
        dets = [d for d in dets if _iou(best, d) < iou_thresh]
    return keep


def _iou(a, b):
    xi1, yi1 = max(a[0], b[0]), max(a[1], b[1])
    xi2, yi2 = min(a[2], b[2]), min(a[3], b[3])
    inter = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    aa = (a[2] - a[0]) * (a[3] - a[1])
    ab = (b[2] - b[0]) * (b[3] - b[1])
    return inter / (aa + ab - inter + 1e-6)


# ===================================================================
#  1. Exhaustive Sliding Window
# ===================================================================

def exhaustive_sliding_window(image, win_h, win_w, feature_fn, head,
                               stride, conf_thresh, nms_iou):
    """
    Brute-force grid scan.  Evaluates the head at **every** position
    spaced by *stride* pixels.
    """
    H, W = image.shape[:2]
    heatmap = np.zeros((H, W), dtype=np.float32)
    detections = []
    n_proposals = 0
    t0 = time.perf_counter()

    for y in range(0, H - win_h + 1, stride):
        for x in range(0, W - win_w + 1, stride):
            patch = image[y:y + win_h, x:x + win_w]
            feats = feature_fn(patch)
            label, conf = head.predict(feats)
            n_proposals += 1
            if label != "background":
                heatmap[y:y + win_h, x:x + win_w] = np.maximum(
                    heatmap[y:y + win_h, x:x + win_w], conf)
                if conf >= conf_thresh:
                    detections.append((x, y, x + win_w, y + win_h, label, conf))

    elapsed_ms = (time.perf_counter() - t0) * 1000
    if detections:
        detections = nms(detections, nms_iou)
    return detections, n_proposals, elapsed_ms, heatmap


# ===================================================================
#  2. Image Pyramid
# ===================================================================

def image_pyramid(image, win_h, win_w, feature_fn, head,
                  stride, conf_thresh, nms_iou,
                  scales=(0.5, 0.75, 1.0, 1.25, 1.5)):
    """
    Resize the image at several scales, run a sliding window at each
    level, and map detections back to original coordinates.
    Finds objects at sizes different from the training crop.
    """
    H, W = image.shape[:2]
    heatmap = np.zeros((H, W), dtype=np.float32)
    detections = []
    n_proposals = 0
    t0 = time.perf_counter()

    for scale in scales:
        sH, sW = int(H * scale), int(W * scale)
        if sH < win_h or sW < win_w:
            continue
        scaled = cv2.resize(image, (sW, sH))

        for y in range(0, sH - win_h + 1, stride):
            for x in range(0, sW - win_w + 1, stride):
                patch = scaled[y:y + win_h, x:x + win_w]
                feats = feature_fn(patch)
                label, conf = head.predict(feats)
                n_proposals += 1
                if label != "background":
                    # Map back to original image coordinates
                    ox  = int(x / scale)
                    oy  = int(y / scale)
                    ox2 = min(int((x + win_w) / scale), W)
                    oy2 = min(int((y + win_h) / scale), H)
                    heatmap[oy:oy2, ox:ox2] = np.maximum(
                        heatmap[oy:oy2, ox:ox2], conf)
                    if conf >= conf_thresh:
                        detections.append((ox, oy, ox2, oy2, label, conf))

    elapsed_ms = (time.perf_counter() - t0) * 1000
    if detections:
        detections = nms(detections, nms_iou)
    return detections, n_proposals, elapsed_ms, heatmap


# ===================================================================
#  3. Coarse-to-Fine Search
# ===================================================================

def coarse_to_fine(image, win_h, win_w, feature_fn, head,
                   fine_stride, conf_thresh, nms_iou,
                   coarse_factor=4, refine_radius=2):
    """
    Two-pass hierarchical search.

    Pass 1 β€” Scan at *coarse_factor Γ— fine_stride* to cheaply identify
             hot regions (using a relaxed threshold of 0.7 Γ— conf_thresh).
    Pass 2 β€” Re-scan **only** the neighbourhood of each hit at
             *fine_stride*, within *refine_radius* steps in each direction.
    """
    H, W = image.shape[:2]
    heatmap = np.zeros((H, W), dtype=np.float32)
    detections = []
    n_proposals = 0
    t0 = time.perf_counter()

    coarse_stride = fine_stride * coarse_factor

    # --- Pass 1: coarse ---
    hot_spots = []
    for y in range(0, H - win_h + 1, coarse_stride):
        for x in range(0, W - win_w + 1, coarse_stride):
            patch = image[y:y + win_h, x:x + win_w]
            feats = feature_fn(patch)
            label, conf = head.predict(feats)
            n_proposals += 1
            if label != "background" and conf >= conf_thresh * 0.7:
                hot_spots.append((x, y))
                heatmap[y:y + win_h, x:x + win_w] = np.maximum(
                    heatmap[y:y + win_h, x:x + win_w], conf)

    # --- Pass 2: fine around hot spots ---
    visited = set()
    for hx, hy in hot_spots:
        for dy in range(-refine_radius, refine_radius + 1):
            for dx in range(-refine_radius, refine_radius + 1):
                x = hx + dx * fine_stride
                y = hy + dy * fine_stride
                if (x, y) in visited:
                    continue
                if x < 0 or y < 0 or x + win_w > W or y + win_h > H:
                    continue
                visited.add((x, y))
                patch = image[y:y + win_h, x:x + win_w]
                feats = feature_fn(patch)
                label, conf = head.predict(feats)
                n_proposals += 1
                if label != "background":
                    heatmap[y:y + win_h, x:x + win_w] = np.maximum(
                        heatmap[y:y + win_h, x:x + win_w], conf)
                    if conf >= conf_thresh:
                        detections.append((x, y, x + win_w, y + win_h,
                                           label, conf))

    elapsed_ms = (time.perf_counter() - t0) * 1000
    if detections:
        detections = nms(detections, nms_iou)
    return detections, n_proposals, elapsed_ms, heatmap


# ===================================================================
#  4. Contour Proposals
# ===================================================================

def contour_proposals(image, win_h, win_w, feature_fn, head,
                      conf_thresh, nms_iou,
                      canny_low=50, canny_high=150,
                      area_tolerance=3.0):
    """
    Generate candidate regions from image structure:
    Canny edges β†’ morphological closing β†’ contour extraction.
    Keep contours whose bounding-box area is within *area_tolerance*Γ—
    of the window area, centre a window on each, and score with the head.

    Returns an extra key ``edge_map`` in the heatmap slot for
    visualisation on the page (the caller can detect this).
    """
    H, W = image.shape[:2]
    heatmap = np.zeros((H, W), dtype=np.float32)
    detections = []
    n_proposals = 0
    t0 = time.perf_counter()

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, canny_low, canny_high)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,
                                   cv2.CHAIN_APPROX_SIMPLE)

    target_area = win_h * win_w
    min_area = target_area / area_tolerance
    max_area = target_area * area_tolerance

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < min_area or area > max_area:
            continue
        bx, by, bw, bh = cv2.boundingRect(cnt)
        # Centre a window on the contour centre
        cx, cy = bx + bw // 2, by + bh // 2
        px = max(0, min(cx - win_w // 2, W - win_w))
        py = max(0, min(cy - win_h // 2, H - win_h))

        patch = image[py:py + win_h, px:px + win_w]
        if patch.shape[0] != win_h or patch.shape[1] != win_w:
            continue

        feats = feature_fn(patch)
        label, conf = head.predict(feats)
        n_proposals += 1

        if label != "background":
            heatmap[py:py + win_h, px:px + win_w] = np.maximum(
                heatmap[py:py + win_h, px:px + win_w], conf)
            if conf >= conf_thresh:
                detections.append((px, py, px + win_w, py + win_h,
                                   label, conf))

    elapsed_ms = (time.perf_counter() - t0) * 1000
    if detections:
        detections = nms(detections, nms_iou)
    return detections, n_proposals, elapsed_ms, heatmap, edges


# ===================================================================
#  5. Template Matching
# ===================================================================

def template_matching(image, template, conf_thresh, nms_iou,
                      method=cv2.TM_CCOEFF_NORMED):
    """
    OpenCV normalised cross-correlation.
    No trained head β€” pure pixel similarity between *template* and every
    image position.  Extremely fast (optimised C++) but not invariant to
    rotation, scale, or illumination.
    """
    H, W = image.shape[:2]
    th, tw = template.shape[:2]
    t0 = time.perf_counter()

    result = cv2.matchTemplate(image, template, method)

    if method in (cv2.TM_CCOEFF_NORMED, cv2.TM_CCORR_NORMED):
        score_map = np.clip(result, 0, 1).astype(np.float32)
    else:
        lo, hi = result.min(), result.max()
        score_map = ((result - lo) / (hi - lo + 1e-6)).astype(np.float32)

    # Full-size heatmap (resize for visualisation)
    heatmap = cv2.resize(score_map, (W, H), interpolation=cv2.INTER_LINEAR)

    # Extract detections above threshold
    detections = []
    locs = np.where(score_map >= conf_thresh)
    for y, x in zip(*locs):
        detections.append((int(x), int(y), int(x + tw), int(y + th),
                           "object", float(score_map[y, x])))

    n_proposals = score_map.shape[0] * score_map.shape[1]
    elapsed_ms = (time.perf_counter() - t0) * 1000

    if detections:
        detections = nms(detections, nms_iou)
    return detections, n_proposals, elapsed_ms, heatmap


# ===================================================================
#  Registry  β€”  metadata used by the Streamlit page
# ===================================================================

STRATEGIES = {
    "Exhaustive Sliding Window": {
        "icon": "πŸ”²",
        "fn":   exhaustive_sliding_window,
        "needs_head": True,
        "short": "Brute-force grid scan at every stride position.",
        "detail": (
            "The simplest approach: a fixed-size window slides across the "
            "**entire image** at regular intervals.  At every position the "
            "patch is extracted, features are computed, and the head classifies it.\n\n"
            "**Complexity:** $O\\!\\left(\\frac{W}{s} \\times \\frac{H}{s}\\right)$ "
            "where $s$ = stride.\n\n"
            "**Pro:** Guaranteed to evaluate every location β€” nothing is missed.\n\n"
            "**Con:** Extremely slow on large images or small strides."
        ),
    },
    "Image Pyramid": {
        "icon": "πŸ”Ί",
        "fn":   image_pyramid,
        "needs_head": True,
        "short": "Multi-scale resize + sliding window.",
        "detail": (
            "Builds a **Gaussian pyramid** by resizing the image to several "
            "scales (e.g. 50 %, 75 %, 100 %, 125 %, 150 %).  A sliding-window "
            "scan runs at each level and detections are mapped back to original "
            "coordinates.\n\n"
            "**Why:** The training crop has a fixed size.  If the real object "
            "appears larger or smaller in the scene, a single-scale scan will "
            "miss it.  The pyramid handles **scale variation**.\n\n"
            "**Cost:** Multiplies the number of proposals by the number of "
            "scales β€” slower than single-scale exhaustive."
        ),
    },
    "Coarse-to-Fine": {
        "icon": "🎯",
        "fn":   coarse_to_fine,
        "needs_head": True,
        "short": "Two-pass hierarchical refinement.",
        "detail": (
            "**Pass 1 β€” Coarse:** Scans the image with a large stride "
            "(coarse\\_factor Γ— fine\\_stride) using a relaxed confidence "
            "threshold (70 % of the target) to cheaply identify *hot regions*.\n\n"
            "**Pass 2 β€” Fine:** Re-scans **only** the neighbourhood around "
            "each coarse hit at the fine stride, within *refine\\_radius* steps "
            "in each direction.\n\n"
            "**Speedup:** Typically **3–10Γ—** faster than exhaustive when the "
            "object is spatially sparse (i.e. most of the image is background)."
        ),
    },
    "Contour Proposals": {
        "icon": "✏️",
        "fn":   contour_proposals,
        "needs_head": True,
        "short": "Edge-driven candidate regions scored by head.",
        "detail": (
            "Instead of scanning everywhere, this method lets **image "
            "structure** drive the search:\n\n"
            "1. Canny edge detection\n"
            "2. Morphological closing to bridge nearby edges\n"
            "3. External contour extraction\n"
            "4. Filter contours whose area falls within *area\\_tolerance* "
            "of the window area\n"
            "5. Centre a window on each surviving contour and score with "
            "the trained head\n\n"
            "**Proposals evaluated:** Typically 10–100Γ— fewer than exhaustive. "
            "Speed depends on scene complexity (more edges β†’ more proposals)."
        ),
    },
    "Template Matching": {
        "icon": "πŸ“‹",
        "fn":   template_matching,
        "needs_head": False,
        "short": "OpenCV cross-correlation β€” no head needed.",
        "detail": (
            "Classical **normalised cross-correlation** (NCC).  Slides the "
            "crop template over the image computing pixel-level similarity "
            "at every position.  No trained head is involved.\n\n"
            "**Speed:** Runs entirely in OpenCV's optimised C++ backend β€” "
            "orders of magnitude faster than Python-level loops.\n\n"
            "**Limitation:** Not invariant to rotation, scale, or illumination "
            "changes.  Works best when the object appears at the **exact same "
            "size and orientation** as the crop."
        ),
    },
}