Spaces:

sumitsingh830
/

SAM2-Image-Auto-Segment

Running

SAM2-Image-Auto-Segment / model /utils.py

Singh

Fix: Replace obsolete libgl1-mesa-glx with libgl1 for Debian Trixie compatibility

1243014 4 months ago

3.87 kB

	import cv2
	import numpy as np
	import requests
	from requests.exceptions import Timeout, RequestException
	from skimage import measure


	def load_image_from_url(url: str):
	"""
	Load image from URL and return as BGR numpy array.

	Args:
	url: Image URL string

	Returns:
	BGR image as numpy array

	Raises:
	ValueError: If image cannot be decoded
	requests.RequestException: If URL request fails
	Timeout: If request times out
	"""
	try:
	# Use tuple for timeout: (connect_timeout, read_timeout)
	# connect_timeout: time to establish connection (10 seconds)
	# read_timeout: time to read data after connection (60 seconds)
	# Increased timeouts to handle slow servers and large images
	response = requests.get(url, timeout=(10, 60))
	response.raise_for_status()
	img = cv2.imdecode(
	np.frombuffer(response.content, np.uint8),
	cv2.IMREAD_COLOR
	)
	if img is None:
	raise ValueError(f"Failed to decode image from URL: {url}")
	return img
	except Timeout as e:
	raise Timeout(
	f"Request to {url} timed out. The server may be slow or unreachable. "
	f"Please try again or use a different image URL. Error: {str(e)}"
	)
	except RequestException as e:
	raise RequestException(
	f"Failed to fetch image from URL: {url}. Error: {str(e)}"
	)


	def mask_to_polygon(mask, scale_factors=(1.0, 1.0)):
	"""
	Convert binary mask to polygon coordinates (CVAT-style).
	Uses cv2.findContours and cv2.approxPolyDP like CVAT does.
	Includes post-processing to ensure complete polygon coverage.

	Args:
	mask: Binary mask (numpy array, uint8, 0 or 255)
	scale_factors: Tuple (scale_x, scale_y) to scale coordinates FROM original TO display size

	Returns:
	List of coordinates in CVAT format: [x1, y1, x2, y2, x3, y3, ...]
	"""
	scale_x, scale_y = scale_factors

	# Convert mask to binary format for cv2.findContours
	if mask.dtype != np.uint8:
	mask = mask.astype(np.uint8)

	# Ensure binary mask (0 or 255)
	if mask.max() > 1:
	mask = (mask > 127).astype(np.uint8) * 255

	# Additional smoothing to ensure complete coverage (CVAT-style)
	# Small morphological closing to connect nearby regions
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
	mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)

	# Find contours (CVAT-style)
	contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	return []

	# Get the largest contour by area (most accurate for object shape)
	largest_contour = max(contours, key=cv2.contourArea)

	# Approximate polygon (CVAT-style, epsilon=1.0)
	# Using epsilon relative to contour perimeter for better accuracy
	epsilon = max(1.0, cv2.arcLength(largest_contour, True) * 0.001) # Adaptive epsilon
	approx_contour = cv2.approxPolyDP(largest_contour, epsilon=epsilon, closed=True)

	if approx_contour.shape[0] < 3:
	return []

	# Flatten and convert to list
	polygon = approx_contour.reshape(-1, 2).astype(float)

	# Scale coordinates FROM original image size TO display size (inverse of bbox scaling)
	# If scale_x > 1, original is larger than display, so we divide
	# If scale_x < 1, original is smaller than display, so we divide (still correct)
	if scale_x != 1.0 or scale_y != 1.0:
	polygon[:, 0] = polygon[:, 0] / scale_x # x coordinates: original -> display
	polygon[:, 1] = polygon[:, 1] / scale_y # y coordinates: original -> display

	# Flatten to CVAT format: [x1, y1, x2, y2, ...]
	return polygon.flatten().tolist()