Spaces:

kerojohan
/

cavemark

Sleeping

kerojohan

Align Space detector logic with main

f036731 about 1 month ago

13.9 kB

	"""
	app.py — CaveMark Gradio Space for Hugging Face
	Wraps detect_cave.py pipeline to work in-memory (no disk I/O).
	"""

	import cv2
	import numpy as np
	import gradio as gr

	from detect_cave import (
	preprocess_image,
	compute_valid_region,
	compute_ir_depth,
	generate_candidates,
	select_best_candidate,
	grabcut_refine,
	refine_mask,
	)


	# ──────────────────────────────────────────────────────────────────────────────
	# In-memory draw helpers (mirrors draw_result but returns numpy arrays)
	# ──────────────────────────────────────────────────────────────────────────────

	def _draw_result_arrays(gray_u8, refined_mask, scores,
	weight_map, profile_norm,
	all_candidates, all_scores):
	h, w = gray_u8.shape

	# ── Main result overlay ───────────────────────────────────────────────────
	vis = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)

	dil_r = max(5, int(min(h, w) * 0.025))
	dil_k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2dil_r+1, 2dil_r+1))
	dil_mask = cv2.dilate(refined_mask, dil_k)
	ring_mask = cv2.bitwise_and(dil_mask, cv2.bitwise_not(refined_mask))
	ring_overlay = vis.copy()
	ring_overlay[ring_mask > 0] = (30, 160, 255)
	cv2.addWeighted(ring_overlay, 0.28, vis, 0.72, 0, vis)

	overlay = vis.copy()
	overlay[refined_mask > 0] = (100, 210, 60)
	cv2.addWeighted(overlay, 0.35, vis, 0.65, 0, vis)

	contours, _ = cv2.findContours(refined_mask, cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
	cv2.drawContours(vis, contours, -1, (0, 255, 80), 2)

	score_val = scores.get("total", 0.0)
	label = f"cave entrance score={score_val:.2f}"
	if contours:
	cnt = max(contours, key=cv2.contourArea)
	x, y, bw, bh = cv2.boundingRect(cnt)
	tx, ty = x + 5, max(y - 12, 25)
	else:
	tx, ty = 10, 30

	fs = max(0.55, min(w, h) / 900)
	th = max(1, int(fs * 2))
	cv2.putText(vis, label, (tx+2, ty+2), cv2.FONT_HERSHEY_SIMPLEX,
	fs, (0, 0, 0), th+2)
	cv2.putText(vis, label, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX,
	fs, (0, 255, 120), th)

	result_rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

	# ── Mask ──────────────────────────────────────────────────────────────────
	mask_rgb = cv2.cvtColor(refined_mask, cv2.COLOR_GRAY2RGB)

	# ── Debug: valid region ───────────────────────────────────────────────────
	dv = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
	for ch in range(3):
	c = dv[:, :, ch].astype(np.float32)
	if ch == 2:
	c = c * weight_map + 180 * (1.0 - weight_map)
	else:
	c = c * weight_map
	dv[:, :, ch] = np.clip(c, 0, 255).astype(np.uint8)
	for col in range(w - 1):
	y1 = h - 1 - int(profile_norm[col] * 59)
	y2 = h - 1 - int(profile_norm[col + 1] * 59)
	cv2.line(dv, (col, y1), (col+1, y2), (0, 255, 255), 1)
	cv2.putText(dv, "valid region (red=penalised)", (10, 25),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
	valid_rgb = cv2.cvtColor(dv, cv2.COLOR_BGR2RGB)

	# ── Debug: candidates ─────────────────────────────────────────────────────
	dc = cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR)
	colours = [(255,80,0),(0,80,255),(200,0,200),(0,200,200),
	(200,200,0),(0,160,80),(128,128,255),(255,128,128)]
	indexed = sorted(range(len(all_candidates)),
	key=lambda i: all_scores[i]["total"])
	for rank, i in enumerate(indexed):
	col = colours[i % len(colours)]
	cl, _ = cv2.findContours(all_candidates[i], cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
	cv2.drawContours(dc, cl, -1, col, 1)
	if rank >= len(indexed) - 5 and cl:
	c0 = max(cl, key=cv2.contourArea)
	M = cv2.moments(c0)
	if M["m00"] > 0:
	cx_m = int(M["m10"] / M["m00"])
	cy_m = int(M["m01"] / M["m00"])
	cv2.putText(dc, f"{all_scores[i]['total']:.2f}", (cx_m, cy_m),
	cv2.FONT_HERSHEY_SIMPLEX, 0.4, col, 1)
	cv2.drawContours(dc, contours, -1, (255, 255, 255), 2)
	cv2.putText(dc,
	f"{len(all_candidates)} candidates (white=best, {score_val:.2f})",
	(10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
	cands_rgb = cv2.cvtColor(dc, cv2.COLOR_BGR2RGB)

	return result_rgb, mask_rgb, valid_rgb, cands_rgb


	# ──────────────────────────────────────────────────────────────────────────────
	# Full in-memory pipeline
	# ──────────────────────────────────────────────────────────────────────────────

	def _process_array(img_rgb: np.ndarray):
	"""Run the full CaveMark pipeline on a numpy RGB array."""
	# Convert to grayscale
	gray_u8 = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
	gray_f32 = gray_u8.astype(np.float32) / 255.0
	h, w = gray_u8.shape

	proc = preprocess_image(gray_u8, gray_f32)
	wmap, lc, rc, pn, actual_lc, actual_rc = compute_valid_region(gray_f32)
	depth_map = compute_ir_depth(gray_f32)

	candidates = generate_candidates(proc, gray_f32, h, w, lc, rc)

	if not candidates:
	blank = np.zeros((h, w), np.uint8)
	blank_rgb = cv2.cvtColor(blank, cv2.COLOR_GRAY2RGB)
	vis_rgb = cv2.cvtColor(cv2.cvtColor(gray_u8, cv2.COLOR_GRAY2BGR),
	cv2.COLOR_BGR2RGB)
	info = "No cave entrance candidates found."
	return vis_rgb, blank_rgb, blank_rgb, blank_rgb, info

	best_mask, scores, all_sc = select_best_candidate(
	candidates, gray_f32, wmap, lc, rc, depth_map=depth_map
	)

	# Solidity filter
	if scores.get("solidity", 1.0) < 0.65 and np.count_nonzero(best_mask) > 100:
	_is_dark_void = scores.get("mean_inside", 1.0) < 0.15
	mask_weights = wmap[best_mask > 0]
	w_thresh = np.percentile(mask_weights, 50 if _is_dark_void else 60)
	high_w = ((best_mask > 0) & (wmap >= w_thresh)).astype(np.uint8) * 255
	sk = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
	high_w = cv2.morphologyEx(high_w, cv2.MORPH_CLOSE, sk)
	high_w = cv2.morphologyEx(high_w, cv2.MORPH_OPEN, sk)
	n_hw, labels_hw, stats_hw, centroids_hw = cv2.connectedComponentsWithStats(
	high_w, 8)
	if n_hw > 1:
	valid_comps = []
	for ci in range(1, n_hw):
	cx_ci = centroids_hw[ci, 0]
	area_ci = stats_hw[ci, cv2.CC_STAT_AREA]
	if lc <= cx_ci <= rc and area_ci >= np.count_nonzero(best_mask) * 0.10:
	valid_comps.append((ci, area_ci))
	if valid_comps:
	best_ci = max(valid_comps, key=lambda x: x[1])[0]
	best_mask = ((labels_hw == best_ci) * 255).astype(np.uint8)
	else:
	largest = 1 + np.argmax(stats_hw[1:, cv2.CC_STAT_AREA])
	candidate_hw = ((labels_hw == largest) * 255).astype(np.uint8)
	if np.count_nonzero(candidate_hw) >= np.count_nonzero(best_mask) * 0.15:
	best_mask = candidate_hw

	# Post-selection expansion
	best_area_frac = np.count_nonzero(best_mask) / (h * w)
	if best_area_frac < 0.25:
	relax_pct = min(50, max(30, int(scores.get("area_frac", 0.1) * 100 * 4)))
	relax_thr = int(np.percentile(proc["denoised"], relax_pct))
	_, relax_dark = cv2.threshold(proc["denoised"], relax_thr, 255,
	cv2.THRESH_BINARY_INV)
	br_k = cv2.getStructuringElement(
	cv2.MORPH_ELLIPSE,
	(max(9, int(min(h, w) * 0.02) \| 1), max(9, int(min(h, w) * 0.02) \| 1)),
	)
	relax_dark = cv2.morphologyEx(relax_dark, cv2.MORPH_CLOSE, br_k)
	n_rd, labels_rd, _, _ = cv2.connectedComponentsWithStats(relax_dark, 8)
	overlap_labels = set(np.unique(labels_rd[best_mask > 0])) - {0}
	if overlap_labels:
	expanded = np.zeros_like(best_mask)
	for lb in overlap_labels:
	expanded[labels_rd == lb] = 255
	if lc > int(w * 0.05):
	expanded[:, :lc] = 0
	if rc < int(w * 0.95):
	expanded[:, rc+1:] = 0
	n_exp, labels_exp, stats_exp, _ = cv2.connectedComponentsWithStats(
	expanded, 8)
	if n_exp > 1:
	largest_exp = 1 + np.argmax(stats_exp[1:, cv2.CC_STAT_AREA])
	expanded = ((labels_exp == largest_exp) * 255).astype(np.uint8)
	exp_area_frac = np.count_nonzero(expanded) / (h * w)
	if exp_area_frac <= 0.40 and exp_area_frac > best_area_frac * 0.8:
	exp_mean = float(gray_f32[expanded > 0].mean())
	orig_mean = float(gray_f32[best_mask > 0].mean())
	orig_pts = np.argwhere(best_mask > 0).astype(np.float32)
	exp_pts = np.argwhere(expanded > 0).astype(np.float32)
	orig_cy_m, orig_cx_m = orig_pts.mean(axis=0)
	exp_cy_m, exp_cx_m = exp_pts.mean(axis=0)
	centroid_shift = (
	np.sqrt((exp_cx_m - orig_cx_m) 2 + (exp_cy_m - orig_cy_m) 2)
	/ min(h, w)
	)
	if exp_mean < orig_mean + 0.15 and centroid_shift <= 0.20:
	best_mask = expanded

	# GrabCut
	gc_result = grabcut_refine(gray_u8, best_mask, expand_ratio=2.0)
	if np.count_nonzero(gc_result) > 0:
	best_mask = gc_result

	refined = refine_mask(best_mask, gray_f32)

	if lc > int(w * 0.05):
	refined[:, :lc] = 0
	if rc < int(w * 0.95):
	refined[:, rc + 1:] = 0

	result_rgb, mask_rgb, valid_rgb, cands_rgb = _draw_result_arrays(
	gray_u8, refined, scores, wmap, pn, candidates, all_sc
	)

	final_area = np.count_nonzero(refined) / (h * w)
	info = (
	f"Score: {scores['total']:.2f} \| "
	f"Area: {final_area*100:.1f}% \| "
	f"Contrast: {scores['contrast']:.2f} \| "
	f"IR depth: {scores['ir_depth']:.2f} \| "
	f"Darkness: {scores['dark']:.2f} \| "
	f"Texture mult: {scores['texture_mult']:.2f} \| "
	f"Candidates: {len(candidates)}"
	)
	return result_rgb, mask_rgb, valid_rgb, cands_rgb, info


	# ──────────────────────────────────────────────────────────────────────────────
	# Gradio interface
	# ──────────────────────────────────────────────────────────────────────────────

	def detect(image):
	if image is None:
	return None, None, None, None, "No image provided."
	return _process_array(image)


	with gr.Blocks(title="CaveMark — Cave Entrance Detector") as demo:
	gr.Markdown(
	"""
	# CaveMark — Automatic Cave Entrance Detector

	Classical computer vision pipeline (OpenCV + NumPy) that locates cave entrances
	in IR/NIR monochrome imagery — no deep learning required.

	Upload an IR or NIR image from a trail camera, security camera or similar sensor.
	The pipeline runs: preprocess → valid-region → IR-depth → candidates → score →
	expand → GrabCut → refine → visualise.
	"""
	)

	with gr.Row():
	inp = gr.Image(label="Input image", type="numpy")
	btn = gr.Button("Detect cave entrance", variant="primary")

	info_box = gr.Markdown(label="Detection summary")

	with gr.Row():
	out_result = gr.Image(label="Result overlay")
	out_mask = gr.Image(label="Binary mask")

	with gr.Row():
	out_valid = gr.Image(label="Valid-region weight map")
	out_cands = gr.Image(label="Candidate scoring debug")

	btn.click(
	fn=detect,
	inputs=inp,
	outputs=[out_result, out_mask, out_valid, out_cands, info_box],
	)

	gr.Examples(
	examples=[
	["examples/background.png"],
	["examples/background2.png"],
	["examples/background3.png"],
	["examples/background4.png"],
	["examples/background5.png"],
	["examples/background6.png"],
	["examples/background7.png"],
	["examples/background8.png"],
	],
	inputs=inp,
	outputs=[out_result, out_mask, out_valid, out_cands, info_box],
	fn=detect,
	cache_examples=False,
	)

	gr.Markdown(
	"""
	---
	How it works: [GitHub repo](https://github.com/kerojohan/cavemark) · MIT License
	"""
	)

	if __name__ == "__main__":
	demo.launch()