| |
| """ |
| detect_cave.py — Automatic cave entrance detector for IR/NIR imagery. |
| |
| Usage: |
| python detect_cave.py # batch: all jpg/png in current dir |
| python detect_cave.py img1.png img2.png # specific images |
| |
| v4 — Improved pipeline (opencv + numpy only, no external models): |
| - IR physics depth map (darkness × multi-scale local uniformity) as new signal |
| - Texture gate: penalises textured rock/vegetation masquerading as voids |
| - Vertical centroid gate: suppresses top-of-frame artefacts |
| - GrabCut boundary refinement after candidate selection |
| - Contour smoothing in refine_mask (wrap-around Gaussian) |
| - Amber dilation ring in result visualisation |
| """ |
|
|
| import cv2 |
| import numpy as np |
| import os |
| import sys |
| import glob |
|
|
|
|
| |
| |
| |
|
|
| def load_image(path: str): |
| """Load image → (gray_u8, gray_f32 [0..1]).""" |
| img = cv2.imread(path, cv2.IMREAD_UNCHANGED) |
| if img is None: |
| raise FileNotFoundError(f"Cannot read: {path}") |
| if img.ndim == 3: |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
| else: |
| gray = img.copy() |
| gray_u8 = gray.astype(np.uint8) |
| gray_f32 = gray_u8.astype(np.float32) / 255.0 |
| return gray_u8, gray_f32 |
|
|
|
|
| |
| |
| |
|
|
| def preprocess_image(gray_u8, gray_f32): |
| """ |
| Gentle preprocessing: |
| - Median denoise |
| - Very-large-blur background illumination estimate |
| - Division normalisation (preserves cave darkness relative to local bg) |
| """ |
| h, w = gray_u8.shape |
|
|
| denoised = cv2.medianBlur(gray_u8, 5) |
|
|
| |
| bg_k = max(3, int(min(h, w) * 0.40) | 1) |
| background = cv2.GaussianBlur(denoised.astype(np.float32), |
| (bg_k, bg_k), 0) |
| background = np.clip(background, 10.0, 255.0) |
|
|
| |
| corrected_f = denoised.astype(np.float32) / background |
| corrected_u8 = np.clip(corrected_f * 170, 0, 255).astype(np.uint8) |
|
|
| return { |
| "denoised": denoised, |
| "background": background, |
| "corrected_u8": corrected_u8, |
| } |
|
|
|
|
| |
| |
| |
|
|
| def compute_valid_region(gray_f32): |
| """ |
| Soft weight map based on horizontal illumination profile. |
| Uses 80th percentile per column, smoothed. |
| Returns (weight_map, left_col, right_col, profile_norm). |
| """ |
| h, w = gray_f32.shape |
|
|
| col_profile = np.percentile(gray_f32, 80, axis=0).astype(np.float32) |
| smooth_k = max(5, int(w * 0.08) | 1) |
| profile_smooth = cv2.GaussianBlur( |
| col_profile.reshape(1, -1), (smooth_k, 1), 0 |
| ).flatten() |
|
|
| pmax = max(profile_smooth.max(), 1e-6) |
| profile_norm = profile_smooth / pmax |
|
|
| drop_thresh = 0.45 |
| actual_left_col = 0 |
| for c in range(w): |
| if profile_norm[c] >= drop_thresh: |
| actual_left_col = c |
| break |
| actual_right_col = w - 1 |
| for c in range(w - 1, -1, -1): |
| if profile_norm[c] >= drop_thresh: |
| actual_right_col = c |
| break |
|
|
| left_col = min(actual_left_col, int(w * 0.30)) |
| right_col = max(actual_right_col, int(w * 0.70)) |
|
|
| |
| weight_row = np.ones(w, dtype=np.float32) |
| for c in range(w): |
| if c < left_col: |
| weight_row[c] = 0.3 + 0.7 * c / max(left_col, 1) |
| elif c > right_col: |
| weight_row[c] = 0.3 + 0.7 * (w - 1 - c) / max(w - 1 - right_col, 1) |
| weight_row *= np.clip(profile_norm / drop_thresh, 0.3, 1.0) |
| weight_row = np.clip(weight_row, 0.0, 1.0) |
|
|
| weight_map = np.tile(weight_row, (h, 1)) |
| return weight_map, left_col, right_col, profile_norm, actual_left_col, actual_right_col |
|
|
|
|
| |
| |
| |
|
|
| def compute_ir_depth(gray_f32): |
| """ |
| Fast IR physics depth map: darkness × local_uniformity at 3 scales. |
| |
| Cave voids absorb all IR → near-black AND very uniform. |
| Textured surfaces (rock, vegetation) can appear dark but non-uniform. |
| Returns a depth map in [0..1]; higher = more likely to be a deep cavity. |
| """ |
| h, w = gray_f32.shape |
| darkness = 1.0 - gray_f32 |
| depths = [] |
| for base_k in [15, 31, 61]: |
| ksize = max(3, min(base_k, min(h, w) // 3) | 1) |
| mean_l = cv2.GaussianBlur(gray_f32, (ksize, ksize), 0) |
| mean_sq = cv2.GaussianBlur(gray_f32 * gray_f32, (ksize, ksize), 0) |
| var_l = np.clip(mean_sq - mean_l * mean_l, 0.0, None) |
| std_l = np.sqrt(var_l) |
| |
| denom = np.clip(mean_l + 0.05, 0.05, None) |
| uniformity = 1.0 - np.clip(std_l / denom, 0.0, 1.0) |
| depths.append(darkness * uniformity) |
| return np.mean(depths, axis=0).astype(np.float32) |
|
|
|
|
| |
| |
| |
|
|
| def _extract_components(binary, min_area): |
| """Extract connected components ≥ min_area after morphological cleaning.""" |
| k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) |
| binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, k) |
| binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, k) |
| n, labels, stats, _ = cv2.connectedComponentsWithStats(binary, 8) |
| result = [] |
| for i in range(1, n): |
| if stats[i, cv2.CC_STAT_AREA] >= min_area: |
| result.append(((labels == i) * 255).astype(np.uint8)) |
| return result |
|
|
|
|
| def _extract_components_heavy(binary, min_area, h, w): |
| """Extract components with HEAVY morphological bridging (large closing). |
| Bridges fragmented dark spots that belong to the same cave entrance.""" |
| close_size = max(15, int(min(h, w) * 0.04) | 1) |
| close_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, |
| (close_size, close_size)) |
| bridged = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, close_k) |
| k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) |
| bridged = cv2.morphologyEx(bridged, cv2.MORPH_OPEN, k) |
| n, labels, stats, _ = cv2.connectedComponentsWithStats(bridged, 8) |
| result = [] |
| for i in range(1, n): |
| if stats[i, cv2.CC_STAT_AREA] >= min_area: |
| result.append(((labels == i) * 255).astype(np.uint8)) |
| return result |
|
|
|
|
| def generate_candidates(proc, gray_f32, h, w, left_col=0, right_col=None): |
| """ |
| Multi-strategy candidate generation: |
| A. Multi-level thresholding with standard cleaning |
| B. Multi-level thresholding with heavy bridging |
| C. Iterative seed-growth from darkest pixels |
| D. Otsu thresholding |
| E. Adaptive threshold intersected with a dark base |
| F. Valid-zone-only masking (lateral shadows masked out) |
| """ |
| denoised = proc["denoised"] |
| corrected_u8 = proc["corrected_u8"] |
|
|
| candidates = [] |
| min_area = int(h * w * 0.008) |
|
|
| |
| for pct in [10, 15, 20, 25, 30, 35, 40]: |
| thr = int(np.percentile(denoised, pct)) |
| _, binary = cv2.threshold(denoised, thr, 255, cv2.THRESH_BINARY_INV) |
| candidates += _extract_components(binary, min_area) |
|
|
| |
| for pct in [15, 25, 35, 45]: |
| thr = int(np.percentile(corrected_u8, pct)) |
| _, binary = cv2.threshold(corrected_u8, thr, 255, cv2.THRESH_BINARY_INV) |
| candidates += _extract_components(binary, min_area) |
|
|
| |
| for pct in [10, 15, 20, 25, 30, 35]: |
| thr = int(np.percentile(denoised, pct)) |
| _, binary = cv2.threshold(denoised, thr, 255, cv2.THRESH_BINARY_INV) |
| candidates += _extract_components_heavy(binary, min_area, h, w) |
|
|
| |
| p1 = int(np.percentile(denoised, 1)) |
| _, seed = cv2.threshold(denoised, max(p1, 3), 255, cv2.THRESH_BINARY_INV) |
| seed_k = cv2.getStructuringElement( |
| cv2.MORPH_ELLIPSE, |
| (max(7, int(min(h, w) * 0.03) | 1), |
| max(7, int(min(h, w) * 0.03) | 1)) |
| ) |
| seed = cv2.morphologyEx(seed, cv2.MORPH_CLOSE, seed_k) |
|
|
| for pct in [5, 10, 15, 20, 25, 30, 35, 40, 50]: |
| thr = int(np.percentile(denoised, pct)) |
| _, dark_level = cv2.threshold(denoised, thr, 255, cv2.THRESH_BINARY_INV) |
| grow_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15)) |
| grown = cv2.dilate(seed, grow_k, iterations=2) |
| grown = cv2.bitwise_and(grown, dark_level) |
| grown = cv2.morphologyEx(grown, cv2.MORPH_CLOSE, seed_k) |
| seed = cv2.bitwise_or(seed, grown) |
| candidates += _extract_components(grown, min_area) |
|
|
| |
| _, th_otsu = cv2.threshold(denoised, 0, 255, |
| cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) |
| candidates += _extract_components(th_otsu, min_area) |
| candidates += _extract_components_heavy(th_otsu, min_area, h, w) |
|
|
| |
| block = max(11, int(min(h, w) * 0.15) | 1) |
| th_adapt = cv2.adaptiveThreshold( |
| denoised, 255, |
| cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
| cv2.THRESH_BINARY_INV, |
| blockSize=block, C=10 |
| ) |
| med_val = int(np.median(denoised)) |
| _, dark_base = cv2.threshold(denoised, med_val, 255, cv2.THRESH_BINARY_INV) |
| combined = cv2.bitwise_and(th_adapt, dark_base) |
| candidates += _extract_components_heavy(combined, min_area, h, w) |
|
|
| |
| if right_col is None: |
| right_col = w - 1 |
| if left_col > 10 or right_col < w - 11: |
| masked_den = denoised.copy() |
| masked_den[:, :left_col] = 255 |
| masked_den[:, right_col+1:] = 255 |
| for pct in [10, 15, 20, 25, 30, 35, 40]: |
| thr = int(np.percentile(denoised, pct)) |
| _, binary = cv2.threshold(masked_den, thr, 255, cv2.THRESH_BINARY_INV) |
| candidates += _extract_components(binary, min_area) |
| candidates += _extract_components_heavy(binary, min_area, h, w) |
|
|
| |
| unique = [] |
| for cand in candidates: |
| cand_nz = np.count_nonzero(cand) |
| is_dup = False |
| for ref in unique: |
| inter = np.count_nonzero(cand & ref) |
| union = cand_nz + np.count_nonzero(ref) - inter |
| if union > 0 and inter / union > 0.80: |
| is_dup = True |
| break |
| if not is_dup: |
| unique.append(cand) |
|
|
| return unique |
|
|
|
|
| |
| |
| |
|
|
| def score_candidate(mask, gray_f32, weight_map, left_col, right_col, |
| darkest5_mask, depth_map=None): |
| """ |
| Multi-criteria scoring with MULTIPLICATIVE gates. |
| |
| Key design: |
| - Contrast vs surround is the primary additive signal |
| - IR physics depth rewards dark AND uniform regions (true voids) |
| - Texture gate (multiplicative) penalises textured rock/vegetation |
| - Vertical gate (multiplicative) suppresses top-frame artefacts |
| - Area and solidity are MULTIPLICATIVE — wrong size/shape kills score |
| """ |
| h, w = gray_f32.shape |
| img_area = h * w |
| mask_bool = mask.astype(bool) |
| area = int(mask_bool.sum()) |
| if area < 10: |
| return {"total": -1.0} |
|
|
| |
| contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE) |
| if not contours: |
| return {"total": -1.0} |
| cnt = max(contours, key=cv2.contourArea) |
| cnt_area = cv2.contourArea(cnt) |
| hull_area = cv2.contourArea(cv2.convexHull(cnt)) |
| solidity = cnt_area / hull_area if hull_area > 0 else 0.0 |
| x, y, bw, bh = cv2.boundingRect(cnt) |
| aspect = min(bw, bh) / max(bw, bh) if max(bw, bh) > 0 else 0.0 |
| area_frac = area / img_area |
|
|
| |
| vals_inside = gray_f32[mask_bool] |
| mean_inside = float(vals_inside.mean()) |
| std_inside = float(vals_inside.std()) |
| darkness = 1.0 - mean_inside |
|
|
| |
| ring_width = max(40, int(min(h, w) * 0.08)) |
| dil_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, |
| (ring_width, ring_width)) |
| dilated = cv2.dilate(mask, dil_k) |
| ring = dilated.astype(bool) & (~mask_bool) |
| if ring.sum() > 100: |
| mean_outside = float(gray_f32[ring].mean()) |
| else: |
| mean_outside = float(gray_f32.mean()) |
| contrast = mean_outside - mean_inside |
| contrast_score = np.clip(contrast / 0.25, 0.0, 1.0) |
|
|
| |
| dark_score = np.clip(darkness / 0.7, 0.0, 1.0) |
|
|
| |
| darkest5_bool = darkest5_mask.astype(bool) |
| total_darkest = max(darkest5_bool.sum(), 1) |
| contained_frac = float((mask_bool & darkest5_bool).sum()) / total_darkest |
| enrichment = contained_frac / max(area_frac, 0.001) |
| enrichment_score = np.clip((enrichment - 1.0) / 8.0, 0.0, 1.0) |
|
|
| |
| dist_transform = cv2.distanceTransform(mask, cv2.DIST_L2, 5) |
| max_dist = float(dist_transform.max()) |
| ref_dist = min(h, w) * 0.12 |
| depth_score = np.clip(max_dist / ref_dist, 0.0, 1.0) |
|
|
| |
| |
| if depth_map is not None: |
| ir_depth_score = float(np.clip(depth_map[mask_bool].mean() / 0.5, 0.0, 1.0)) |
| else: |
| ir_depth_score = dark_score * 0.5 |
|
|
| |
| grad_x = cv2.Sobel(gray_f32, cv2.CV_32F, 1, 0, ksize=5) |
| grad_y = cv2.Sobel(gray_f32, cv2.CV_32F, 0, 1, ksize=5) |
| grad_mag = np.sqrt(grad_x**2 + grad_y**2) |
| thin_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) |
| contour_ring = cv2.dilate(mask, thin_k) - cv2.erode(mask, thin_k) |
| contour_bool = contour_ring.astype(bool) |
| if contour_bool.sum() > 0: |
| gradient_score = np.clip(float(grad_mag[contour_bool].mean()) / 0.10, |
| 0.0, 1.0) |
| else: |
| gradient_score = 0.0 |
|
|
| |
| valid_score = float(weight_map[mask_bool].mean()) |
|
|
| |
| aspect_score = 1.0 if aspect >= 0.15 else aspect / 0.15 |
|
|
| |
| cx = x + bw / 2.0 |
| cy = y + bh / 2.0 |
| dist_x = abs(cx / w - 0.5) * 2 |
| dist_y = abs(cy / h - 0.5) * 2 |
| position_score = 1.0 - 0.10 * dist_x - 0.05 * dist_y |
|
|
| |
| |
| |
| additive = ( |
| 0.24 * contrast_score |
| + 0.14 * dark_score |
| + 0.06 * enrichment_score |
| + 0.12 * depth_score |
| + 0.10 * ir_depth_score |
| + 0.09 * gradient_score |
| + 0.06 * valid_score |
| + 0.03 * aspect_score |
| + 0.04 * position_score |
| + 0.12 * 1.0 |
| ) |
|
|
| |
|
|
| |
| |
| if area_frac < 0.005: |
| area_mult = 0.05 |
| elif area_frac < 0.02: |
| area_mult = 0.05 + 0.20 * (area_frac - 0.005) / 0.015 |
| elif area_frac < 0.04: |
| area_mult = 0.25 + 0.25 * (area_frac - 0.02) / 0.02 |
| elif area_frac < 0.08: |
| area_mult = 0.50 + 0.50 * (area_frac - 0.04) / 0.04 |
| elif area_frac <= 0.28: |
| area_mult = 1.0 |
| elif area_frac <= 0.45: |
| area_mult = 1.0 - 0.80 * (area_frac - 0.28) / 0.17 |
| else: |
| area_mult = max(0.05, 0.20 - 0.15 * (area_frac - 0.45) / 0.55) |
|
|
| |
| if solidity >= 0.45: |
| solidity_mult = 1.0 |
| elif solidity >= 0.25: |
| solidity_mult = 0.4 + 0.6 * (solidity - 0.25) / 0.20 |
| else: |
| solidity_mult = 0.4 |
|
|
| |
| |
| if std_inside <= 0.10: |
| texture_mult = 1.0 |
| elif std_inside <= 0.22: |
| texture_mult = 1.0 - 0.40 * (std_inside - 0.10) / 0.12 |
| else: |
| texture_mult = 0.60 |
|
|
| |
| |
| if cy / h < 0.25: |
| vert_gate = 0.75 |
| elif cy / h < 0.35: |
| vert_gate = 0.75 + 0.25 * (cy / h - 0.25) / 0.10 |
| else: |
| vert_gate = 1.0 |
|
|
| |
| lateral_pen = 1.0 |
| if int(cx) < left_col or int(cx) > right_col: |
| if valid_score < 0.5: |
| lateral_pen = 0.4 |
|
|
| |
| |
| |
| bp = 3 |
| border_touches = int( |
| (mask[:bp, :] > 0).any() |
| + (mask[-bp:, :] > 0).any() |
| + (mask[:, :bp] > 0).any() |
| + (mask[:, -bp:] > 0).any() |
| ) |
| if border_touches == 0: |
| border_mult = 1.0 |
| elif border_touches == 1: |
| border_mult = 0.60 |
| else: |
| border_mult = 0.35 |
|
|
| total = (additive * area_mult * solidity_mult |
| * texture_mult * vert_gate * lateral_pen * border_mult) |
|
|
| return { |
| "total": round(float(total), 4), |
| "additive": round(float(additive), 3), |
| "contrast": round(float(contrast_score), 3), |
| "dark": round(float(dark_score), 3), |
| "enrichment": round(float(enrichment_score), 3), |
| "depth": round(float(depth_score), 3), |
| "ir_depth": round(float(ir_depth_score), 3), |
| "texture": round(float(std_inside), 3), |
| "texture_mult": round(float(texture_mult), 3), |
| "vert_gate": round(float(vert_gate), 3), |
| "area_mult": round(float(area_mult), 3), |
| "area_frac": round(float(area_frac), 4), |
| "solidity": round(float(solidity), 3), |
| "sol_mult": round(float(solidity_mult), 3), |
| "gradient": round(float(gradient_score), 3), |
| "valid_score": round(float(valid_score), 3), |
| "mean_inside": round(float(mean_inside), 3), |
| "mean_outside": round(float(mean_outside), 3), |
| "border_touches": border_touches, |
| "border_mult": round(float(border_mult), 3), |
| } |
|
|
|
|
| |
| |
| |
|
|
| def select_best_candidate(candidates, gray_f32, weight_map, |
| left_col, right_col, depth_map=None): |
| """Score all candidates, return (best_mask, best_scores, all_scores).""" |
| if not candidates: |
| return None, {}, [] |
|
|
| p5 = np.percentile(gray_f32, 5) |
| darkest5_mask = (gray_f32 <= p5).astype(np.uint8) * 255 |
|
|
| all_scores = [] |
| for cand in candidates: |
| sc = score_candidate(cand, gray_f32, weight_map, left_col, right_col, |
| darkest5_mask, depth_map=depth_map) |
| all_scores.append(sc) |
|
|
| best_idx = max(range(len(all_scores)), |
| key=lambda i: all_scores[i]["total"]) |
| return candidates[best_idx], all_scores[best_idx], all_scores |
|
|
|
|
| |
| |
| |
|
|
| def grabcut_refine(gray_u8, mask, conservative_mask=None, expand_ratio=2.5): |
| """ |
| Refine mask boundary using GrabCut (OpenCV graph-cut, no extra deps). |
| |
| If conservative_mask is provided (the pre-expansion baseline), it is used |
| as definite FG so GrabCut anchors on the known-good core and can include |
| additional dark interior pixels without over-trimming. |
| |
| Without conservative_mask: eroded core is definite FG. |
| With conservative_mask: conservative_mask is definite FG; extra pixels in |
| mask become probable FG, letting GrabCut decide which dark interior areas |
| (e.g. cave floor below a rock band) are genuinely part of the entrance. |
| """ |
| h, w = gray_u8.shape |
| area = np.count_nonzero(mask) |
| if area < 200: |
| return mask |
|
|
| contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE) |
| if not contours: |
| return mask |
| cnt = max(contours, key=cv2.contourArea) |
| x, y, bw, bh = cv2.boundingRect(cnt) |
| if bw < 5 or bh < 5: |
| return mask |
|
|
| |
| ex = int(bw * (expand_ratio - 1) / 2) |
| ey = int(bh * (expand_ratio - 1) / 2) |
| x1 = max(0, x - ex); y1 = max(0, y - ey) |
| x2 = min(w, x + bw + ex); y2 = min(h, y + bh + ey) |
| if x2 - x1 < 5 or y2 - y1 < 5: |
| return mask |
|
|
| gc_mask = np.full((h, w), cv2.GC_BGD, dtype=np.uint8) |
|
|
| |
| dil_r = max(5, int(min(bw, bh) * 0.10)) |
| dil_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*dil_r+1, 2*dil_r+1)) |
| prob_fg = cv2.dilate(mask, dil_k) |
| prob_fg[:y1, :] = 0; prob_fg[y2:, :] = 0 |
| prob_fg[:, :x1] = 0; prob_fg[:, x2:] = 0 |
| gc_mask[prob_fg > 0] = cv2.GC_PR_FGD |
|
|
| if conservative_mask is not None and np.count_nonzero(conservative_mask) >= 10: |
| |
| gc_mask[conservative_mask > 0] = cv2.GC_FGD |
| else: |
| |
| ero_r = max(3, int(min(bw, bh) * 0.08)) |
| ero_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, |
| (2*ero_r+1, 2*ero_r+1)) |
| core = cv2.erode(mask, ero_k) |
| gc_mask[core > 0] = cv2.GC_FGD |
|
|
| |
| far_r = max(7, int(min(bw, bh) * 0.20)) |
| far_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*far_r+1, 2*far_r+1)) |
| far_dil = cv2.dilate(mask, far_k) |
| in_rect = np.zeros((h, w), np.uint8) |
| in_rect[y1:y2, x1:x2] = 255 |
| prob_bg = cv2.bitwise_and(in_rect, cv2.bitwise_not(far_dil)) |
| gc_mask[prob_bg > 0] = cv2.GC_PR_BGD |
|
|
| if (gc_mask == cv2.GC_FGD).sum() < 10: |
| return mask |
|
|
| bgd_model = np.zeros((1, 65), np.float64) |
| fgd_model = np.zeros((1, 65), np.float64) |
| rect = (x1, y1, x2 - x1, y2 - y1) |
|
|
| try: |
| vis3 = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR) |
| cv2.grabCut(vis3, gc_mask, rect, bgd_model, fgd_model, |
| 3, cv2.GC_INIT_WITH_MASK) |
| result = np.where( |
| (gc_mask == cv2.GC_FGD) | (gc_mask == cv2.GC_PR_FGD), |
| 255, 0 |
| ).astype(np.uint8) |
|
|
| if np.count_nonzero(result) < area * 0.25: |
| return mask |
|
|
| |
| |
| n_comp, labels, stats, _ = cv2.connectedComponentsWithStats(result, 8) |
| if n_comp > 2: |
| overlap_ids = np.unique(labels[mask > 0]) |
| overlap_ids = overlap_ids[overlap_ids != 0] |
| if len(overlap_ids) > 0: |
| keep_id = overlap_ids[ |
| np.argmax(stats[overlap_ids, cv2.CC_STAT_AREA]) |
| ] |
| result = ((labels == keep_id) * 255).astype(np.uint8) |
|
|
| if np.count_nonzero(result) < area * 0.25: |
| return mask |
|
|
| return result |
|
|
| except Exception: |
| return mask |
|
|
|
|
| |
| |
| |
|
|
| def refine_mask(mask, gray_f32): |
| """Close gaps, fill holes, smooth boundary, keep largest component.""" |
| h, w = gray_f32.shape |
| orig_area = np.count_nonzero(mask) |
|
|
| cs = max(11, int(min(h, w) * 0.02) | 1) |
| ck = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (cs, cs)) |
| refined = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, ck) |
|
|
| |
| bordered = np.zeros((h + 2, w + 2), np.uint8) |
| bordered[1:-1, 1:-1] = refined |
| flood = bordered.copy() |
| pad = np.zeros((h + 4, w + 4), np.uint8) |
| cv2.floodFill(flood, pad, (0, 0), 255) |
| holes = cv2.bitwise_not(flood)[1:-1, 1:-1] |
| refined = cv2.bitwise_or(refined, holes) |
|
|
| |
| sk = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9)) |
| refined = cv2.morphologyEx(refined, cv2.MORPH_CLOSE, sk) |
| refined = cv2.morphologyEx(refined, cv2.MORPH_OPEN, sk) |
|
|
| |
| n, labels, stats, _ = cv2.connectedComponentsWithStats(refined, 8) |
| if n > 1: |
| largest = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA]) |
| refined = ((labels == largest) * 255).astype(np.uint8) |
|
|
| |
| cnts, _ = cv2.findContours(refined, cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_NONE) |
| if cnts: |
| main_cnt = max(cnts, key=cv2.contourArea) |
| pts = main_cnt.reshape(-1, 2).astype(np.float32) |
| n_pts = len(pts) |
| if n_pts > 30: |
| sigma = min(15.0, max(4.0, n_pts / 120.0)) |
| ksize = max(3, int(6 * sigma) | 1) |
| pad_n = ksize // 2 |
| padded = np.concatenate([pts[-pad_n:], pts, pts[:pad_n]], axis=0) |
| kernel = cv2.getGaussianKernel(ksize, sigma).flatten() |
| sx = np.convolve(padded[:, 0], kernel, mode='valid')[:n_pts] |
| sy = np.convolve(padded[:, 1], kernel, mode='valid')[:n_pts] |
| sx = np.clip(sx, 0, w - 1) |
| sy = np.clip(sy, 0, h - 1) |
| smooth_cnt = (np.stack([sx, sy], axis=1) |
| .astype(np.int32).reshape(-1, 1, 2)) |
| smooth_mask = np.zeros_like(refined) |
| cv2.fillPoly(smooth_mask, [smooth_cnt], 255) |
| |
| if np.count_nonzero(smooth_mask) >= np.count_nonzero(refined) * 0.70: |
| refined = smooth_mask |
|
|
| |
| if np.count_nonzero(refined) > max(orig_area * 2, h * w * 0.50): |
| return mask |
|
|
| return refined |
|
|
|
|
| |
| |
| |
|
|
| def draw_result(gray_u8, refined_mask, scores, |
| out_path, mask_path, debug_valid_path, |
| weight_map, profile_norm, |
| debug_cands_path, all_candidates, all_scores): |
| """Save result overlay, mask, and debug images.""" |
| h, w = gray_u8.shape |
|
|
| |
| vis = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR) |
|
|
| |
| dil_r = max(5, int(min(h, w) * 0.025)) |
| dil_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*dil_r+1, 2*dil_r+1)) |
| dil_mask = cv2.dilate(refined_mask, dil_k) |
| ring_mask = cv2.bitwise_and(dil_mask, cv2.bitwise_not(refined_mask)) |
| ring_overlay = vis.copy() |
| ring_overlay[ring_mask > 0] = (30, 160, 255) |
| cv2.addWeighted(ring_overlay, 0.28, vis, 0.72, 0, vis) |
|
|
| |
| overlay = vis.copy() |
| overlay[refined_mask > 0] = (100, 210, 60) |
| cv2.addWeighted(overlay, 0.35, vis, 0.65, 0, vis) |
|
|
| contours, _ = cv2.findContours(refined_mask, cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE) |
| cv2.drawContours(vis, contours, -1, (0, 255, 80), 2) |
|
|
| score_val = scores.get("total", 0.0) |
| label = f"cave entrance score={score_val:.2f}" |
| if contours: |
| cnt = max(contours, key=cv2.contourArea) |
| x, y, bw, bh = cv2.boundingRect(cnt) |
| tx, ty = x + 5, max(y - 12, 25) |
| else: |
| tx, ty = 10, 30 |
|
|
| fs = max(0.55, min(w, h) / 900) |
| th = max(1, int(fs * 2)) |
| cv2.putText(vis, label, (tx+2, ty+2), cv2.FONT_HERSHEY_SIMPLEX, |
| fs, (0,0,0), th+2) |
| cv2.putText(vis, label, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, |
| fs, (0,255,120), th) |
|
|
| cv2.imwrite(out_path, vis) |
| cv2.imwrite(mask_path, refined_mask) |
|
|
| |
| dv = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR) |
| for ch in range(3): |
| c = dv[:,:,ch].astype(np.float32) |
| if ch == 2: |
| c = c * weight_map + 180 * (1.0 - weight_map) |
| else: |
| c = c * weight_map |
| dv[:,:,ch] = np.clip(c, 0, 255).astype(np.uint8) |
| for c in range(w - 1): |
| y1 = h - 1 - int(profile_norm[c] * 59) |
| y2 = h - 1 - int(profile_norm[c + 1] * 59) |
| cv2.line(dv, (c, y1), (c+1, y2), (0,255,255), 1) |
| cv2.putText(dv, "valid region (red=penalised)", (10,25), |
| cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2) |
| cv2.imwrite(debug_valid_path, dv) |
|
|
| |
| dc = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR) |
| colours = [(255,80,0),(0,80,255),(200,0,200),(0,200,200), |
| (200,200,0),(0,160,80),(128,128,255),(255,128,128)] |
| indexed = sorted(range(len(all_candidates)), |
| key=lambda i: all_scores[i]["total"]) |
| for rank, i in enumerate(indexed): |
| col = colours[i % len(colours)] |
| cl, _ = cv2.findContours(all_candidates[i], cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE) |
| cv2.drawContours(dc, cl, -1, col, 1) |
| if rank >= len(indexed) - 5 and cl: |
| c0 = max(cl, key=cv2.contourArea) |
| M = cv2.moments(c0) |
| if M["m00"] > 0: |
| cx_m = int(M["m10"]/M["m00"]) |
| cy_m = int(M["m01"]/M["m00"]) |
| sc_v = all_scores[i]["total"] |
| cv2.putText(dc, f"{sc_v:.2f}", (cx_m, cy_m), |
| cv2.FONT_HERSHEY_SIMPLEX, 0.4, col, 1) |
| cv2.drawContours(dc, contours, -1, (255,255,255), 2) |
| cv2.putText(dc, |
| f"{len(all_candidates)} candidates (white=best, {score_val:.2f})", |
| (10,25), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 2) |
| cv2.imwrite(debug_cands_path, dc) |
|
|
|
|
| |
| |
| |
|
|
| def process_image(input_path, output_dir): |
| """Full pipeline for one image.""" |
| bn = os.path.splitext(os.path.basename(input_path))[0] |
| out_r = os.path.join(output_dir, f"{bn}_result.png") |
| out_m = os.path.join(output_dir, f"{bn}_mask.png") |
| out_dv = os.path.join(output_dir, f"{bn}_debug_valid.png") |
| out_dc = os.path.join(output_dir, f"{bn}_debug_candidates.png") |
|
|
| gray_u8, gray_f32 = load_image(input_path) |
| h, w = gray_u8.shape |
| print(f" [{bn}] loaded {w}x{h}") |
|
|
| proc = preprocess_image(gray_u8, gray_f32) |
| wmap, lc, rc, pn, actual_lc, actual_rc = compute_valid_region(gray_f32) |
| depth_map = compute_ir_depth(gray_f32) |
| print(f" [{bn}] valid cols {lc}–{rc} (actual {actual_lc}–{actual_rc}, of {w})") |
|
|
| candidates = generate_candidates(proc, gray_f32, h, w, lc, rc) |
| print(f" [{bn}] {len(candidates)} unique candidates") |
|
|
| if not candidates: |
| print(f" [{bn}] WARNING: no candidates") |
| blank = np.zeros((h, w), np.uint8) |
| cv2.imwrite(out_m, blank) |
| cv2.imwrite(out_r, cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)) |
| return [out_r, out_m] |
|
|
| best_mask, scores, all_sc = select_best_candidate( |
| candidates, gray_f32, wmap, lc, rc, depth_map=depth_map |
| ) |
| print(f" [{bn}] best score {scores['total']:.3f} " |
| f"area={scores['area_frac']*100:.1f}% " |
| f"add={scores['additive']:.2f} " |
| f"contrast={scores['contrast']:.2f} " |
| f"texture={scores['texture']:.2f}(×{scores['texture_mult']:.2f}) " |
| f"ir_depth={scores['ir_depth']:.2f} " |
| f"depth={scores['depth']:.2f} " |
| f"area_m={scores['area_mult']:.2f} " |
| f"sol={scores['solidity']:.2f}(×{scores['sol_mult']:.2f}) " |
| f"border={scores['border_touches']}(×{scores['border_mult']:.2f}) " |
| f"in={scores['mean_inside']:.2f}±{scores['texture']:.2f} " |
| f"out={scores['mean_outside']:.2f}") |
|
|
| |
| |
| |
| if scores.get("solidity", 1.0) < 0.65 and np.count_nonzero(best_mask) > 100: |
| _is_dark_void = scores.get("mean_inside", 1.0) < 0.15 |
| mask_weights = wmap[best_mask > 0] |
| |
| w_thresh = np.percentile(mask_weights, 50 if _is_dark_void else 60) |
| high_w = ((best_mask > 0) & (wmap >= w_thresh)).astype(np.uint8) * 255 |
| sk = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) |
| high_w = cv2.morphologyEx(high_w, cv2.MORPH_CLOSE, sk) |
| high_w = cv2.morphologyEx(high_w, cv2.MORPH_OPEN, sk) |
| n_hw, labels_hw, stats_hw, centroids_hw = cv2.connectedComponentsWithStats( |
| high_w, 8) |
| if n_hw > 1: |
| valid_comps = [] |
| for ci in range(1, n_hw): |
| cx_ci = centroids_hw[ci, 0] |
| area_ci = stats_hw[ci, cv2.CC_STAT_AREA] |
| if lc <= cx_ci <= rc and area_ci >= np.count_nonzero(best_mask) * 0.10: |
| valid_comps.append((ci, area_ci)) |
| if valid_comps: |
| best_ci = max(valid_comps, key=lambda x: x[1])[0] |
| best_mask = ((labels_hw == best_ci) * 255).astype(np.uint8) |
| else: |
| largest = 1 + np.argmax(stats_hw[1:, cv2.CC_STAT_AREA]) |
| candidate_hw = ((labels_hw == largest) * 255).astype(np.uint8) |
| if np.count_nonzero(candidate_hw) >= np.count_nonzero(best_mask) * 0.15: |
| best_mask = candidate_hw |
|
|
| |
| |
| |
| best_area_frac = np.count_nonzero(best_mask) / (h * w) |
| if best_area_frac < 0.25: |
| relax_pct = min(50, max(30, int(scores.get("area_frac", 0.1) * 100 * 4))) |
| relax_thr = int(np.percentile(proc["denoised"], relax_pct)) |
| _, relax_dark = cv2.threshold(proc["denoised"], relax_thr, 255, |
| cv2.THRESH_BINARY_INV) |
| br_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, |
| (max(9, int(min(h,w)*0.02)|1), |
| max(9, int(min(h,w)*0.02)|1))) |
| relax_dark = cv2.morphologyEx(relax_dark, cv2.MORPH_CLOSE, br_k) |
| n_rd, labels_rd, _, _ = cv2.connectedComponentsWithStats(relax_dark, 8) |
| overlap_labels = set(np.unique(labels_rd[best_mask > 0])) - {0} |
| if overlap_labels: |
| expanded = np.zeros_like(best_mask) |
| for lb in overlap_labels: |
| expanded[labels_rd == lb] = 255 |
| |
| if lc > int(w * 0.05): |
| expanded[:, :lc] = 0 |
| if rc < int(w * 0.95): |
| expanded[:, rc+1:] = 0 |
| n_exp, labels_exp, stats_exp, _ = cv2.connectedComponentsWithStats( |
| expanded, 8) |
| if n_exp > 1: |
| largest_exp = 1 + np.argmax(stats_exp[1:, cv2.CC_STAT_AREA]) |
| expanded = ((labels_exp == largest_exp) * 255).astype(np.uint8) |
| exp_area_frac = np.count_nonzero(expanded) / (h * w) |
| if exp_area_frac <= 0.40 and exp_area_frac > best_area_frac * 0.8: |
| exp_mean = float(gray_f32[expanded > 0].mean()) |
| orig_mean = float(gray_f32[best_mask > 0].mean()) |
| |
| |
| orig_pts = np.argwhere(best_mask > 0).astype(np.float32) |
| exp_pts = np.argwhere(expanded > 0).astype(np.float32) |
| orig_cy_m, orig_cx_m = orig_pts.mean(axis=0) |
| exp_cy_m, exp_cx_m = exp_pts.mean(axis=0) |
| centroid_shift = (np.sqrt((exp_cx_m - orig_cx_m)**2 |
| + (exp_cy_m - orig_cy_m)**2) |
| / min(h, w)) |
| if exp_mean < orig_mean + 0.15 and centroid_shift <= 0.20: |
| print(f" [{bn}] expanded {best_area_frac*100:.1f}% → " |
| f"{exp_area_frac*100:.1f}%") |
| best_mask = expanded |
|
|
| |
| pre_gc = np.count_nonzero(best_mask) / (h * w) |
| gc_result = grabcut_refine(gray_u8, best_mask, expand_ratio=2.0) |
| post_gc = np.count_nonzero(gc_result) / (h * w) |
| if post_gc > 0: |
| print(f" [{bn}] grabcut {pre_gc*100:.1f}% → {post_gc*100:.1f}%") |
| best_mask = gc_result |
|
|
| refined = refine_mask(best_mask, gray_f32) |
|
|
| |
| |
| |
| if lc > int(w * 0.05): |
| refined[:, :lc] = 0 |
| if rc < int(w * 0.95): |
| refined[:, rc + 1:] = 0 |
|
|
| draw_result(gray_u8, refined, scores, |
| out_r, out_m, out_dv, |
| wmap, pn, |
| out_dc, candidates, all_sc) |
|
|
| final_area = np.count_nonzero(refined) / (h * w) |
| print(f" [{bn}] final area {final_area*100:.1f}%") |
| outputs = [out_r, out_m, out_dv, out_dc] |
| for p in outputs: |
| print(f" [{bn}] saved: {os.path.basename(p)}") |
| return outputs |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| if len(sys.argv) >= 2: |
| |
| for img_path in sys.argv[1:]: |
| out_dir = os.path.dirname(os.path.abspath(img_path)) or "." |
| print(f"Processing: {os.path.basename(img_path)}") |
| process_image(img_path, out_dir) |
| print() |
| else: |
| |
| cwd = os.path.dirname(os.path.abspath(__file__)) |
| patterns = ["*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"] |
| found = [] |
| for pat in patterns: |
| found.extend(glob.glob(os.path.join(cwd, pat))) |
| suffixes = ("_result.png","_mask.png","_debug_valid.png", |
| "_debug_candidates.png") |
| inputs = sorted(set( |
| f for f in found |
| if not any(os.path.basename(f).endswith(s) for s in suffixes) |
| )) |
| if not inputs: |
| print("No input images found.") |
| sys.exit(1) |
| print(f"Found {len(inputs)} input image(s):") |
| for p in inputs: |
| print(f" {os.path.basename(p)}") |
| print() |
| all_out = [] |
| for img in inputs: |
| print(f"Processing: {os.path.basename(img)}") |
| all_out += process_image(img, cwd) |
| print() |
| print("─" * 60) |
| print(f"Done. {len(all_out)} output files:") |
| for p in all_out: |
| print(f" {os.path.basename(p)}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|