Spaces:

sumitsingh830
/

SAM2-Image-Auto-Segment

Running

File size: 3,866 Bytes

import cv2
import numpy as np
import requests
from requests.exceptions import Timeout, RequestException
from skimage import measure


def load_image_from_url(url: str):
    """
    Load image from URL and return as BGR numpy array.
    
    Args:
        url: Image URL string
        
    Returns:
        BGR image as numpy array
        
    Raises:
        ValueError: If image cannot be decoded
        requests.RequestException: If URL request fails
        Timeout: If request times out
    """
    try:
        # Use tuple for timeout: (connect_timeout, read_timeout)
        # connect_timeout: time to establish connection (10 seconds)
        # read_timeout: time to read data after connection (60 seconds)
        # Increased timeouts to handle slow servers and large images
        response = requests.get(url, timeout=(10, 60))
        response.raise_for_status()
        img = cv2.imdecode(
            np.frombuffer(response.content, np.uint8),
            cv2.IMREAD_COLOR
        )
        if img is None:
            raise ValueError(f"Failed to decode image from URL: {url}")
        return img
    except Timeout as e:
        raise Timeout(
            f"Request to {url} timed out. The server may be slow or unreachable. "
            f"Please try again or use a different image URL. Error: {str(e)}"
        )
    except RequestException as e:
        raise RequestException(
            f"Failed to fetch image from URL: {url}. Error: {str(e)}"
        )


def mask_to_polygon(mask, scale_factors=(1.0, 1.0)):
    """
    Convert binary mask to polygon coordinates (CVAT-style).
    Uses cv2.findContours and cv2.approxPolyDP like CVAT does.
    Includes post-processing to ensure complete polygon coverage.
    
    Args:
        mask: Binary mask (numpy array, uint8, 0 or 255)
        scale_factors: Tuple (scale_x, scale_y) to scale coordinates FROM original TO display size
    
    Returns:
        List of coordinates in CVAT format: [x1, y1, x2, y2, x3, y3, ...]
    """
    scale_x, scale_y = scale_factors
    
    # Convert mask to binary format for cv2.findContours
    if mask.dtype != np.uint8:
        mask = mask.astype(np.uint8)
    
    # Ensure binary mask (0 or 255)
    if mask.max() > 1:
        mask = (mask > 127).astype(np.uint8) * 255
    
    # Additional smoothing to ensure complete coverage (CVAT-style)
    # Small morphological closing to connect nearby regions
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    # Find contours (CVAT-style)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return []
    
    # Get the largest contour by area (most accurate for object shape)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Approximate polygon (CVAT-style, epsilon=1.0)
    # Using epsilon relative to contour perimeter for better accuracy
    epsilon = max(1.0, cv2.arcLength(largest_contour, True) * 0.001)  # Adaptive epsilon
    approx_contour = cv2.approxPolyDP(largest_contour, epsilon=epsilon, closed=True)
    
    if approx_contour.shape[0] < 3:
        return []
    
    # Flatten and convert to list
    polygon = approx_contour.reshape(-1, 2).astype(float)
    
    # Scale coordinates FROM original image size TO display size (inverse of bbox scaling)
    # If scale_x > 1, original is larger than display, so we divide
    # If scale_x < 1, original is smaller than display, so we divide (still correct)
    if scale_x != 1.0 or scale_y != 1.0:
        polygon[:, 0] = polygon[:, 0] / scale_x  # x coordinates: original -> display
        polygon[:, 1] = polygon[:, 1] / scale_y  # y coordinates: original -> display
    
    # Flatten to CVAT format: [x1, y1, x2, y2, ...]
    return polygon.flatten().tolist()