Spaces:

OmarAlasqa
/

GeoViG-Demo

Sleeping

App Files Files Community

GeoViG-Demo / app.py

OmarAlasqa

Show BBox for medical

ba7be42 3 months ago

raw

history blame contribute delete

13.8 kB

	"""
	GeoViG Multi-Task Gradio Demo
	"""
	import sys, os, json, urllib.request
	import torch
	import torch.nn.functional as F
	from torchvision import transforms
	from PIL import Image, ImageDraw
	from huggingface_hub import hf_hub_download
	import numpy as np
	import gradio as gr

	print(f"DEBUG: System Python: {sys.version}")
	print(f"DEBUG: PyTorch version: {torch.__version__}")

	# ── Gradio Theme ──────────────────────────────────────────────────────────────
	_THEME = gr.themes.Soft(primary_hue="indigo")

	from geovig import geovig_ti, geovig_s, geovig_m, geovig_b

	# ── Paths ──────────────────────────────────────────────────────────────────────
	REPO_ID = "OmarAlasqa/GeoViG"
	CKPT_FILES = {
	"GeoViG-Ti": "pth/geovig_ti_5e4_8G_300_75_22/checkpoint.pth",
	"GeoViG-S": "pth/geovig_s_5e4_8G_300_77_48/checkpoint.pth",
	"GeoViG-M": "pth/geovig_m_5e4_8G_300_80_70/checkpoint.pth",
	"GeoViG-B": "pth/geovig_b_5e4_8G_300_82_38/checkpoint.pth",
	"det_m": "coco_det_seg_pth/geovig_m_det_seg/epoch_12.pth",
	"det_b": "coco_det_seg_pth/geovig_b_det_seg/epoch_12.pth",
	"kvasir_m": "medical/kvasir_geovig_m/checkpoint.pth",
	"dsb_m": "medical/dsb_geovig_m/checkpoint.pth"
	}

	os.environ["HF_HUB_DISABLE_INTERACTIVE_FLOW"] = "1"

	def get_ckpt_path(key):
	try:
	return hf_hub_download(repo_id=REPO_ID, filename=CKPT_FILES[key])
	except Exception as e:
	print(f"Error downloading {key}: {e}")
	return None

	# ── ImageNet labels ────────────────────────────────────────────────────────────
	LABELS_FILE = "imagenet_labels.json"
	if not os.path.exists(LABELS_FILE):
	try:
	urllib.request.urlretrieve(
	"https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json",
	LABELS_FILE)
	except: pass

	IMAGENET_LABELS = []
	if os.path.exists(LABELS_FILE):
	with open(LABELS_FILE) as f:
	IMAGENET_LABELS = json.load(f)

	# ── Pre-processing ─────────────────────────────────────────────────────────────
	IMAGENET_TF = transforms.Compose([
	transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
	transforms.CenterCrop(224),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	])

	# ── Model cache ────────────────────────────────────────────────────────────────
	MODEL_CACHE = {}
	BUILDERS = {"GeoViG-Ti": geovig_ti, "GeoViG-S": geovig_s, "GeoViG-M": geovig_m, "GeoViG-B": geovig_b}

	def _load_cls_model(variant: str):
	if variant in MODEL_CACHE: return MODEL_CACHE[variant]
	model = BUILDERS[variant]()
	path = get_ckpt_path(variant)
	if not path: return None
	sd = torch.load(path, map_location="cpu")
	sd = sd.get("model", sd.get("state_dict", sd))
	model.load_state_dict(sd, strict=False)
	model.eval()
	MODEL_CACHE[variant] = model
	return model

	def classify(image: Image.Image, variant: str):
	if image is None: return {}
	model = _load_cls_model(variant)
	if not model: return {"Error: Weights not found": 1.0}
	x = IMAGENET_TF(image.convert("RGB")).unsqueeze(0)
	with torch.no_grad():
	probs = F.softmax(model(x), dim=-1)[0]
	top_probs, top_idxs = probs.topk(5)
	return {IMAGENET_LABELS[i.item()] if i < len(IMAGENET_LABELS) else f"Class {i}": float(p) for p, i in zip(top_probs, top_idxs)}

	# ── Detection Logic ───────────────────────────────────────────────────────────
	COCO_NAMES = [
	"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
	"traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat",
	"dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack",
	"umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball",
	"kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket",
	"bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
	"sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair",
	"couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
	"remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator",
	"book","clock","vase","scissors","teddy bear","hair drier","toothbrush"
	]

	def _draw_detections(image: Image.Image, result, score_thr=0.3):
	img_draw = image.convert("RGBA")
	draw = ImageDraw.Draw(img_draw)
	seg_np_img = np.array(image.convert("RGB"))

	# Robust unpacking for mmdet 2.x
	if isinstance(result, (list, tuple)) and len(result) == 2 and isinstance(result[0], (list, np.ndarray)):
	bbox_result, segm_result = result
	else:
	bbox_result, segm_result = result, None

	count = 0
	if bbox_result is not None:
	for cls_id, bboxes in enumerate(bbox_result):
	if len(bboxes) == 0: continue

	color = tuple(np.random.randint(100, 255, 3).tolist())
	for i, bbox in enumerate(bboxes):
	if bbox[4] < score_thr: continue
	count += 1
	x1, y1, x2, y2, score = bbox
	draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
	label = f"{COCO_NAMES[cls_id]} {score:.2f}" if cls_id < len(COCO_NAMES) else f"cls {cls_id} {score:.2f}"
	draw.text((x1 + 2, y1 + 2), label, fill=(255, 255, 0))

	if segm_result is not None and len(segm_result) > cls_id and segm_result[cls_id] is not None:
	try:
	mask = segm_result[cls_id][i]
	if isinstance(mask, dict):
	import pycocotools.mask as mask_util
	mask = mask_util.decode(mask)
	mask_bool = mask.astype(bool)
	seg_sample = np.array(color, dtype=np.uint8)
	seg_np_img[mask_bool] = (seg_np_img[mask_bool] * 0.5 + seg_sample * 0.5).astype(np.uint8)
	except Exception as e:
	print(f"DEBUG: Mask blend failed: {e}")

	print(f"DEBUG: Total detections drawn: {count}")
	return img_draw.convert("RGB"), Image.fromarray(seg_np_img)

	DET_MODEL_CACHE = {}
	def detect(image: Image.Image, model_size: str, score_thr: float):
	if image is None: return None, None
	try:
	import mmcv
	print(f"DEBUG: MMCV version: {mmcv.__version__}")
	import geovig_det_backbone
	from mmdet.apis import init_detector, inference_detector
	import cv2
	except Exception as e:
	print(f"DEBUG: Detection setup failed: {e}")
	return image, image

	key = f"det_{model_size.lower()}"
	if key not in DET_MODEL_CACHE:
	cfg = {"M": "configs/mask_rcnn_geovig_m_fpn_1x_coco.py", "B": "configs/mask_rcnn_geovig_b_fpn_1x_coco.py"}[model_size]
	path = get_ckpt_path(key)
	if not path: return image, image
	DET_MODEL_CACHE[key] = init_detector(cfg, path, device="cpu")

	cv_img = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
	result = inference_detector(DET_MODEL_CACHE[key], cv_img)
	return _draw_detections(image, result, score_thr=score_thr)

	# ── Medical logic ─────────────────────────────────────────────────────────────
	MED_MODEL_CACHE = {}
	def segment_medical(image: Image.Image, dataset: str, score_thr: float):
	if image is None: return None
	try:
	import geovig_det_backbone
	from mmdet.apis import init_detector, inference_detector
	import cv2
	except Exception as e:
	print(f"DEBUG: Medical setup failed: {e}")
	return image

	key = f"med_{dataset.lower().replace(' ', '_')}"
	if key not in MED_MODEL_CACHE:
	cfg = {"Kvasir-SEG": "configs/kvasir/mask_rcnn_geovig_m_fpn_1x_kvasir.py", "DSB 2018": "configs/dsb/mask_rcnn_geovig_m_fpn_1x_dsb.py"}[dataset]
	v_key = "kvasir_m" if dataset == "Kvasir-SEG" else "dsb_m"
	path = get_ckpt_path(v_key)
	if not path: return image
	MED_MODEL_CACHE[key] = init_detector(cfg, path, device="cpu")

	cv_img = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
	result = inference_detector(MED_MODEL_CACHE[key], cv_img)

	if isinstance(result, (list, tuple)) and len(result) == 2:
	bbox_result, segm_result = result
	else:
	bbox_result, segm_result = result, None

	combined_mask = np.zeros(cv_img.shape[:2], dtype=np.float32)
	if segm_result:
	for cls_id, cls_masks in enumerate(segm_result):
	for i, mask in enumerate(cls_masks):
	if i < len(bbox_result[cls_id]) and bbox_result[cls_id][i, 4] >= score_thr:
	if isinstance(mask, dict):
	import pycocotools.mask as mask_util
	mask = mask_util.decode(mask)
	combined_mask = np.maximum(combined_mask, mask.astype(np.float32))

	img_arr = np.array(image.convert("RGB")).astype(float)
	overlay = img_arr.copy()
	overlay[combined_mask > 0.5] = np.array([0, 200, 150], dtype=float)
	blended = (img_arr * 0.4 + overlay * 0.6).astype(np.uint8)

	img_pil = Image.fromarray(blended)
	draw = ImageDraw.Draw(img_pil)

	if bbox_result is not None:
	for cls_id, bboxes in enumerate(bbox_result):
	if len(bboxes) == 0: continue
	for bbox in bboxes:
	if bbox[4] < score_thr: continue
	x1, y1, x2, y2, score = bbox
	draw.rectangle([x1, y1, x2, y2], outline=(0, 255, 255), width=3)
	label = f"{score:.2f}"
	draw.text((x1 + 2, y1 + 2), label, fill=(255, 255, 0))

	return img_pil

	# ── UI ────────────────────────────────────────────────────────────────────────
	with gr.Blocks(title="GeoViG Full Demo", theme=_THEME) as demo:
	gr.HTML("<h1 style='text-align:center'>🔷 GeoViG Multi-Task</h1>")

	with gr.Tabs():
	with gr.TabItem("🖼️ Classification"):
	with gr.Row():
	with gr.Column():
	cls_image = gr.Image(type="pil", label="Input Image")
	cls_variant = gr.Radio(["GeoViG-Ti", "GeoViG-S", "GeoViG-M", "GeoViG-B"], value="GeoViG-Ti")
	cls_btn = gr.Button("Predict", variant="primary")
	cls_output = gr.Label(num_top_classes=5)
	cls_btn.click(classify, [cls_image, cls_variant], cls_output)
	gr.Examples(
	examples=[["examples/n03394916_14162.JPEG", "GeoViG-Ti"], ["examples/n03417042_2960.JPEG", "GeoViG-S"]],
	inputs=[cls_image, cls_variant]
	)

	with gr.TabItem("📦 Natural Detection & Segmentation"):
	with gr.Row():
	with gr.Column():
	det_image = gr.Image(type="pil", label="Input Image")
	det_size = gr.Radio(["M", "B"], value="M", label="Backbone")
	det_thr = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Score Threshold")
	det_btn = gr.Button("Detect & Segment", variant="primary")
	with gr.Row():
	det_out = gr.Image(label="Bboxes", type="pil")
	seg_out = gr.Image(label="Masks", type="pil")
	det_btn.click(detect, [det_image, det_size, det_thr], [det_out, seg_out])
	gr.Examples(
	examples=[["examples/cat.png", "B", 0.8], ["examples/000000032081.jpg", "M", 0.7], ["examples/000000045472.jpg", "B", 0.7]],
	inputs=[det_image, det_size, det_thr]
	)

	with gr.TabItem("🏥 Medical Detection & Segmentation"):
	with gr.Row():
	with gr.Column():
	med_image = gr.Image(type="pil", label="Anatomy Image")
	med_dataset = gr.Radio(["Kvasir-SEG", "DSB 2018"], value="Kvasir-SEG")
	med_med_thr = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Threshold")
	med_btn = gr.Button("Detect & Segment", variant="primary")
	med_out = gr.Image(label="Detection & Segmentation Result", type="pil")
	med_btn.click(segment_medical, [med_image, med_dataset, med_med_thr], med_out)
	gr.Examples(
	examples=[
	["examples/cju5y84q3mdv50817eyp82xf3.jpg", "Kvasir-SEG", 0.7],
	["examples/0c2550a23b8a0f29a7575de8c61690d3c31bc897dd5ba66caec201d201a278c2.png", "DSB 2018", 0.7]
	],
	inputs=[med_image, med_dataset, med_med_thr]
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)