Instructions to use Gertlek/DetectiveSAM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sam2
How to use Gertlek/DetectiveSAM with sam2:
# Use SAM2 with images import torch from sam2.sam2_image_predictor import SAM2ImagePredictor predictor = SAM2ImagePredictor.from_pretrained(Gertlek/DetectiveSAM) with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): predictor.set_image(<your_image>) masks, _, _ = predictor.predict(<input_prompts>)# Use SAM2 with videos import torch from sam2.sam2_video_predictor import SAM2VideoPredictor predictor = SAM2VideoPredictor.from_pretrained(Gertlek/DetectiveSAM) with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): state = predictor.init_state(<your_video>) # add new prompts and instantly get the output on the same frame frame_idx, object_ids, masks = predictor.add_new_points(state, <your_prompts>): # propagate the prompts to get masklets throughout the video for frame_idx, object_ids, masks in predictor.propagate_in_video(state): ... - Notebooks
- Google Colab
- Kaggle
File size: 2,759 Bytes
7b474fb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from __future__ import annotations
from pathlib import Path
import numpy as np
from PIL import Image, ImageOps
def ensure_uint8(mask: np.ndarray) -> np.ndarray:
if mask.dtype == np.bool_:
return mask.astype(np.uint8) * 255
if mask.max() <= 1.0:
return (mask * 255).astype(np.uint8)
return mask.astype(np.uint8)
def mask_to_image(mask: np.ndarray) -> Image.Image:
return Image.fromarray(ensure_uint8(mask), mode="L")
def probability_to_image(probability: np.ndarray) -> Image.Image:
clipped = np.clip(probability, 0.0, 1.0)
return Image.fromarray((clipped * 255).astype(np.uint8), mode="L")
def overlay_mask(
image: Image.Image,
mask: np.ndarray,
color: tuple[int, int, int],
alpha: float = 0.45,
) -> Image.Image:
base = np.array(image.convert("RGB"), dtype=np.float32)
overlay = base.copy()
overlay[mask.astype(bool)] = (1.0 - alpha) * overlay[mask.astype(bool)] + alpha * np.array(color, dtype=np.float32)
return Image.fromarray(np.clip(overlay, 0, 255).astype(np.uint8), mode="RGB")
def concat_images(images: list[Image.Image]) -> Image.Image:
widths, heights = zip(*(image.size for image in images))
canvas = Image.new("RGB", (sum(widths), max(heights)), color=(255, 255, 255))
x_offset = 0
for image in images:
canvas.paste(image.convert("RGB"), (x_offset, 0))
x_offset += image.width
return canvas
def save_prediction_outputs(
output_dir: str | Path,
name: str,
source_image: Image.Image,
target_image: Image.Image,
probability_map: np.ndarray,
pred_mask: np.ndarray,
gt_mask: np.ndarray | None = None,
) -> None:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
probability_image = probability_to_image(probability_map)
pred_mask_image = mask_to_image(pred_mask)
pred_overlay = overlay_mask(target_image, pred_mask, color=(255, 0, 0))
comparison_images = [
source_image.convert("RGB"),
target_image.convert("RGB"),
ImageOps.colorize(probability_image, black="black", white="white").convert("RGB"),
pred_overlay,
]
probability_image.save(output_dir / f"{name}_probability.png")
pred_mask_image.save(output_dir / f"{name}_pred_mask.png")
pred_overlay.save(output_dir / f"{name}_pred_overlay.png")
if gt_mask is not None:
gt_mask_image = mask_to_image(gt_mask)
gt_overlay = overlay_mask(target_image, gt_mask, color=(0, 255, 0))
gt_mask_image.save(output_dir / f"{name}_gt_mask.png")
gt_overlay.save(output_dir / f"{name}_gt_overlay.png")
comparison_images.append(gt_overlay)
concat_images(comparison_images).save(output_dir / f"{name}_comparison.png")
|