iszt
/

eye-clahe-processor

@@ -39,7 +39,11 @@ except ImportError:
 # =============================================================================
 def _pil_to_tensor(image: "Image.Image") -> torch.Tensor:
-    """Convert PIL Image to tensor (C, H, W) in [0, 1]."""
     if not PIL_AVAILABLE:
         raise ImportError("PIL is required to process PIL Images")
@@ -63,7 +67,11 @@ def _pil_to_tensor(image: "Image.Image") -> torch.Tensor:
 def _numpy_to_tensor(arr: "np.ndarray") -> torch.Tensor:
-    """Convert numpy array to tensor (C, H, W) in [0, 1]."""
     if not NUMPY_AVAILABLE:
         raise ImportError("NumPy is required to process numpy arrays")
@@ -89,8 +97,14 @@ def standardize_input(
     images: Union[torch.Tensor, List[torch.Tensor], "Image.Image", List["Image.Image"], "np.ndarray", List["np.ndarray"]],
     device: Optional[torch.device] = None,
 ) -> torch.Tensor:
-    """
-    Convert input images to standardized tensor format.
     Args:
         images: Input as:
@@ -151,18 +165,89 @@ def standardize_input(
     return images
-def rgb_to_grayscale(images: torch.Tensor) -> torch.Tensor:
     """
-    Convert RGB images to grayscale using luminance formula.
-    Y = 0.299 * R + 0.587 * G + 0.114 * B
     Args:
-        images: Tensor of shape (B, 3, H, W)
     Returns:
-        Tensor of shape (B, 1, H, W)
     """
     # Luminance weights
     weights = torch.tensor([0.299, 0.587, 0.114], device=images.device, dtype=images.dtype)
@@ -177,11 +262,15 @@ def rgb_to_grayscale(images: torch.Tensor) -> torch.Tensor:
 # =============================================================================
 def create_sobel_kernels(device: torch.device, dtype: torch.dtype) -> tuple:
-    """
-    Create Sobel kernels for gradient computation.
     Returns:
-        Tuple of (sobel_x, sobel_y) kernels, each of shape (1, 1, 3, 3)
     """
     sobel_x = torch.tensor([
         [-1, 0, 1],
@@ -199,14 +288,16 @@ def create_sobel_kernels(device: torch.device, dtype: torch.dtype) -> tuple:
 def compute_gradients(grayscale: torch.Tensor) -> tuple:
-    """
-    Compute image gradients using Sobel filters.
     Args:
-        grayscale: Tensor of shape (B, 1, H, W)
     Returns:
-        Tuple of (grad_x, grad_y, grad_magnitude)
     """
     sobel_x, sobel_y = create_sobel_kernels(grayscale.device, grayscale.dtype)
@@ -226,20 +317,28 @@ def compute_radial_symmetry_response(
     grad_y: torch.Tensor,
     grad_magnitude: torch.Tensor,
 ) -> torch.Tensor:
-    """
-    Compute radial symmetry response for circle detection.
-    This weights regions that are:
-    1. Dark (low intensity - typical of pupil/iris)
-    2. Have strong radial gradients pointing inward
     Args:
-        grayscale: Grayscale image (B, 1, H, W)
-        grad_x, grad_y: Gradient components
-        grad_magnitude: Gradient magnitude
     Returns:
-        Radial symmetry response map (B, 1, H, W)
     """
     B, _, H, W = grayscale.shape
     device = grayscale.device
@@ -311,15 +410,21 @@ def compute_radial_symmetry_response(
 def soft_argmax_2d(response: torch.Tensor, temperature: float = 0.1) -> tuple:
-    """
-    Compute soft argmax to find the center coordinates.
     Args:
-        response: Response map (B, 1, H, W)
-        temperature: Softmax temperature (lower = sharper)
     Returns:
-        Tuple of (cx, cy) each of shape (B,)
     """
     B, _, H, W = response.shape
     device = response.device
@@ -347,17 +452,19 @@ def estimate_eye_center(
     images: torch.Tensor,
     softmax_temperature: float = 0.1,
 ) -> tuple:
-    """
-    Estimate the center of the eye region in each image.
     Args:
-        images: RGB images of shape (B, 3, H, W)
-        softmax_temperature: Temperature for soft argmax (lower = sharper peak detection,
-            higher = more averaging). Typical range: 0.01-1.0. Default 0.1 works well
-            for most fundus images. Use higher values (0.3-0.5) for noisy images.
     Returns:
-        Tuple of (cx, cy) each of shape (B,) in pixel coordinates
     """
     grayscale = rgb_to_grayscale(images)
     grad_x, grad_y, grad_magnitude = compute_gradients(grayscale)
@@ -380,19 +487,28 @@ def estimate_radius(
     min_radius_frac: float = 0.1,
     max_radius_frac: float = 0.5,
 ) -> torch.Tensor:
-    """
-    Estimate the radius of the eye region by analyzing radial intensity profiles.
     Args:
-        images: RGB images (B, 3, H, W)
-        cx, cy: Center coordinates (B,)
-        num_radii: Number of radius samples
-        num_angles: Number of angular samples
-        min_radius_frac: Minimum radius as fraction of image size
-        max_radius_frac: Maximum radius as fraction of image size
     Returns:
-        Estimated radius for each image (B,)
     """
     B, _, H, W = images.shape
     device = images.device
@@ -472,18 +588,26 @@ def compute_crop_box(
     scale_factor: float = 1.1,
     allow_overflow: bool = False,
 ) -> tuple:
-    """
-    Compute square bounding box for cropping.
     Args:
-        cx, cy: Center coordinates (B,)
-        radius: Estimated radius (B,)
-        H, W: Image dimensions
-        scale_factor: Multiply radius by this factor for padding
-        allow_overflow: If True, don't clamp box to image bounds (for pre-cropped images)
     Returns:
-        Tuple of (x1, y1, x2, y2) each of shape (B,)
     """
     # Compute half side length
     half_side = radius * scale_factor
@@ -536,19 +660,23 @@ def batch_crop_and_resize(
     output_size: int,
     padding_mode: str = 'border',
 ) -> torch.Tensor:
-    """
-    Crop and resize images using grid_sample for GPU efficiency.
     Args:
-        images: Input images (B, C, H, W)
-        x1, y1, x2, y2: Crop coordinates (B,) - can extend beyond image bounds
-        output_size: Output square size
-        padding_mode: How to handle out-of-bounds sampling:
-            - 'border': repeat edge pixels (default)
-            - 'zeros': fill with black (useful for pre-cropped images)
     Returns:
-        Cropped and resized images (B, C, output_size, output_size)
     """
     B, C, H, W = images.shape
     device = images.device
@@ -584,13 +712,101 @@ def batch_crop_and_resize(
     return cropped
 # =============================================================================
 # PHASE 4: CLAHE (Torch-Native)
 # =============================================================================
 def _srgb_to_linear(rgb: torch.Tensor) -> torch.Tensor:
-    """Convert sRGB to linear RGB."""
     threshold = 0.04045
     linear = torch.where(
         rgb <= threshold,
@@ -601,7 +817,11 @@ def _srgb_to_linear(rgb: torch.Tensor) -> torch.Tensor:
 def _linear_to_srgb(linear: torch.Tensor) -> torch.Tensor:
-    """Convert linear RGB to sRGB."""
     threshold = 0.0031308
     srgb = torch.where(
         linear <= threshold,
@@ -612,21 +832,26 @@ def _linear_to_srgb(linear: torch.Tensor) -> torch.Tensor:
 def rgb_to_lab(images: torch.Tensor) -> tuple:
-    """
-    Convert sRGB images to CIE LAB color space.
-    This is a proper LAB conversion that:
-    1. Converts sRGB to linear RGB
-    2. Converts linear RGB to XYZ
-    3. Converts XYZ to LAB
     Args:
-        images: RGB images (B, C, H, W) in [0, 1] sRGB
     Returns:
-        Tuple of (L, a, b) where:
-            - L: Luminance in [0, 1] (normalized from [0, 100])
-            - a, b: Chrominance (normalized to roughly [-0.5, 0.5])
     """
     device = images.device
     dtype = images.dtype
@@ -679,15 +904,18 @@ def rgb_to_lab(images: torch.Tensor) -> tuple:
 def lab_to_rgb(L: torch.Tensor, a: torch.Tensor, b_ch: torch.Tensor) -> torch.Tensor:
-    """
-    Convert CIE LAB to sRGB.
     Args:
-        L: Luminance in [0, 1] (normalized from [0, 100])
-        a, b_ch: Chrominance (normalized, roughly [0, 1])
     Returns:
-        RGB images (B, 3, H, W) in [0, 1] sRGB
     """
     # Denormalize
     L_lab = L * 100.0
@@ -735,15 +963,22 @@ def compute_histogram(
     tensor: torch.Tensor,
     num_bins: int = 256,
 ) -> torch.Tensor:
-    """
-    Compute histogram for a batch of single-channel images.
     Args:
-        tensor: Input tensor (B, 1, H, W) with values in [0, 1]
-        num_bins: Number of histogram bins
     Returns:
-        Histograms (B, num_bins)
     """
     B = tensor.shape[0]
     device = tensor.device
@@ -770,16 +1005,21 @@ def clahe_single_tile(
     clip_limit: float,
     num_bins: int = 256,
 ) -> torch.Tensor:
-    """
-    Apply CLAHE to a single tile.
     Args:
-        tile: Input tile (B, 1, tile_h, tile_w)
-        clip_limit: Histogram clip limit
-        num_bins: Number of histogram bins
     Returns:
-        CDF lookup table (B, num_bins)
     """
     B, _, tile_h, tile_w = tile.shape
     device = tile.device
@@ -815,17 +1055,30 @@ def apply_clahe_vectorized(
     clip_limit: float = 2.0,
     num_bins: int = 256,
 ) -> torch.Tensor:
-    """
-    Vectorized CLAHE implementation (more efficient for GPU).
     Args:
-        images: Input images (B, C, H, W)
-        grid_size: Number of tiles in each dimension
-        clip_limit: Histogram clip limit
-        num_bins: Number of histogram bins
     Returns:
-        CLAHE-enhanced images (B, C, H, W)
     """
     B, C, H, W = images.shape
     device = images.device
@@ -955,17 +1208,17 @@ def resize_images(
     mode: str = 'bilinear',
     antialias: bool = True,
 ) -> torch.Tensor:
-    """
-    Resize images to target size.
     Args:
-        images: Input images (B, C, H, W)
-        size: Target size (square)
-        mode: Interpolation mode
-        antialias: Whether to use antialiasing
     Returns:
-        Resized images (B, C, size, size)
     """
     return F.interpolate(
         images,
@@ -982,17 +1235,17 @@ def normalize_images(
     std: Optional[List[float]] = None,
     mode: str = 'imagenet',
 ) -> torch.Tensor:
-    """
-    Normalize images.
     Args:
-        images: Input images (B, C, H, W) in [0, 1]
-        mean: Custom mean (per channel)
-        std: Custom std (per channel)
-        mode: 'imagenet', 'none', or 'custom'
     Returns:
-        Normalized images
     """
     if mode == 'none':
         return images
@@ -1020,16 +1273,30 @@ def normalize_images(
 # =============================================================================
 class EyeCLAHEImageProcessor(BaseImageProcessor):
-    """
-    GPU-native image processor for Color Fundus Photography (CFP) images.
-    This processor:
-    1. Localizes the eye region using gradient-based radial symmetry
-    2. Crops to a border-minimized square centered on the eye
-    3. Applies CLAHE for contrast enhancement
-    4. Resizes and normalizes for vision model input
-    All operations are implemented in pure PyTorch and are CUDA-compatible.
     """
     model_input_names = ["pixel_values"]
@@ -1092,30 +1359,40 @@ class EyeCLAHEImageProcessor(BaseImageProcessor):
     def preprocess(
         self,
         images,
         return_tensors: str = "pt",
         device: Optional[Union[str, torch.device]] = None,
         **kwargs,
     ) -> BatchFeature:
-        """
-        Preprocess images for model input.
         Args:
-            images: Input images in any of these formats:
-                - torch.Tensor: (C,H,W), (B,C,H,W), or list of tensors
-                - PIL.Image.Image: single image or list of images
-                - numpy.ndarray: (H,W,C), (B,H,W,C), or list of arrays
-            return_tensors: Return type (only "pt" supported)
-            device: Target device for processing (e.g., "cuda", "cpu")
         Returns:
-            BatchFeature with keys:
-                - 'pixel_values': Processed images (B, C, size, size)
-                - 'scale_x', 'scale_y': Scale factors for coordinate mapping (B,)
-                - 'offset_x', 'offset_y': Offsets for coordinate mapping (B,)
-            To map coordinates from processed image back to original:
-                orig_x = offset_x + cropped_x * scale_x
-                orig_y = offset_y + cropped_y * scale_y
         """
         if return_tensors != "pt":
             raise ValueError("Only 'pt' (PyTorch) tensors are supported")
@@ -1133,6 +1410,9 @@ class EyeCLAHEImageProcessor(BaseImageProcessor):
         # Standardize input
         images = standardize_input(images, device)
         B, C, H_orig, W_orig = images.shape
         if self.do_crop:
@@ -1164,6 +1444,13 @@ class EyeCLAHEImageProcessor(BaseImageProcessor):
             # Use 'zeros' padding when allow_overflow is True to fill out-of-bounds with black
             padding_mode = 'zeros' if self.allow_overflow else 'border'
             images = batch_crop_and_resize(images, x1, y1, x2, y2, self.size, padding_mode=padding_mode)
         else:
             # Just resize - no crop
             # Compute coordinate mapping for direct resize
@@ -1173,6 +1460,10 @@ class EyeCLAHEImageProcessor(BaseImageProcessor):
             offset_y = torch.zeros(B, device=device, dtype=images.dtype)
             images = resize_images(images, self.size)
         # Apply CLAHE
         if self.do_clahe:
             images = apply_clahe_vectorized(
@@ -1190,25 +1481,23 @@ class EyeCLAHEImageProcessor(BaseImageProcessor):
         )
         # Return with coordinate mapping information (flattened structure)
-        return BatchFeature(
-            data={
-                "pixel_values": images,
-                "scale_x": scale_x,
-                "scale_y": scale_y,
-                "offset_x": offset_x,
-                "offset_y": offset_y,
-            },
-            tensor_type="pt"
-        )
     def __call__(
         self,
         images: Union[torch.Tensor, List[torch.Tensor]],
         **kwargs,
     ) -> BatchFeature:
-        """
-        Process images (alias for preprocess).
-        """
         return self.preprocess(images, **kwargs)

 # =============================================================================
 def _pil_to_tensor(image: "Image.Image") -> torch.Tensor:
+    """Convert a single PIL Image to a float32 tensor of shape (C, H, W) in [0, 1].
+    Converts to RGB if not already. Uses numpy as intermediate when available,
+    otherwise falls back to manual pixel extraction.
+    """
     if not PIL_AVAILABLE:
         raise ImportError("PIL is required to process PIL Images")
 def _numpy_to_tensor(arr: "np.ndarray") -> torch.Tensor:
+    """Convert a single numpy array to a float32 tensor of shape (C, H, W) in [0, 1].
+    Handles grayscale (H, W), HWC (H, W, C) with C in {1, 3, 4}, and uint8/float inputs.
+    Makes a copy to avoid sharing memory with the source array.
+    """
     if not NUMPY_AVAILABLE:
         raise ImportError("NumPy is required to process numpy arrays")
     images: Union[torch.Tensor, List[torch.Tensor], "Image.Image", List["Image.Image"], "np.ndarray", List["np.ndarray"]],
     device: Optional[torch.device] = None,
 ) -> torch.Tensor:
+    """Convert heterogeneous image inputs to a standardized (B, C, H, W) float32 tensor in [0, 1].
+    Accepts torch.Tensor, PIL.Image, numpy.ndarray, or lists thereof. Integer-typed
+    inputs (uint8) are scaled to [0, 1]. The output is clamped to [0, 1].
+    Note: All images in a list must have the same spatial dimensions (required by torch.stack).
+    A single numpy array with ndim==3 is treated as a single HWC image if the last dimension
+    is in {1, 3, 4}; otherwise it falls through to the tensor path (assumed CHW).
     Args:
         images: Input as:
     return images
+def standardize_mask_input(
+    masks: Union[
+        torch.Tensor,
+        List[torch.Tensor],
+        "Image.Image",
+        List["Image.Image"],
+        "np.ndarray",
+        List["np.ndarray"],
+    ],
+    device: Optional[torch.device] = None,
+) -> torch.Tensor:
+    """Convert heterogeneous mask inputs to a standardized (B, 1, H, W) tensor.
+    Unlike ``standardize_input``, this preserves the original dtype (typically integer
+    label values) and does **not** normalize to [0, 1].
+    Accepts torch.Tensor, PIL.Image, numpy.ndarray, or lists thereof.
+    A single 2-D input is treated as (H, W) and expanded to (1, 1, H, W).
+    Args:
+        masks: Input masks in any supported format.
+        device: Target device.
+    Returns:
+        Tensor of shape (B, 1, H, W) with original dtype preserved.
     """
+    # Handle single inputs
+    if PIL_AVAILABLE and isinstance(masks, Image.Image):
+        masks = [masks]
+    if NUMPY_AVAILABLE and isinstance(masks, np.ndarray) and masks.ndim == 2:
+        masks = [masks]
+    # Convert list inputs
+    if isinstance(masks, list):
+        converted = []
+        for m in masks:
+            if PIL_AVAILABLE and isinstance(m, Image.Image):
+                # PIL mask → numpy → tensor
+                m = np.array(m)
+                converted.append(torch.from_numpy(m))
+            elif NUMPY_AVAILABLE and isinstance(m, np.ndarray):
+                converted.append(torch.from_numpy(m))
+            elif isinstance(m, torch.Tensor):
+                converted.append(m)
+            else:
+                raise TypeError(f"Unsupported mask type: {type(m)}")
+        masks = torch.stack(converted)
+    elif NUMPY_AVAILABLE and isinstance(masks, np.ndarray):
+        masks = torch.from_numpy(masks)
+    # At this point masks is a torch.Tensor
+    if masks.dim() == 2:
+        # (H, W) → (1, 1, H, W)
+        masks = masks.unsqueeze(0).unsqueeze(0)
+    elif masks.dim() == 3:
+        # (B, H, W) → (B, 1, H, W)
+        masks = masks.unsqueeze(1)
+    elif masks.dim() == 4:
+        # Assume already (B, C, H, W)
+        pass
+    else:
+        raise ValueError(f"Invalid mask shape: {masks.shape}")
+    # Move to device
+    if device is not None:
+        masks = masks.to(device)
+    return masks
+def rgb_to_grayscale(images: torch.Tensor) -> torch.Tensor:
+    """Convert RGB images to grayscale via ITU-R BT.601 luminance: Y = 0.299R + 0.587G + 0.114B.
     Args:
+        images: Tensor of shape (B, 3, H, W) in any value range.
     Returns:
+        Tensor of shape (B, 1, H, W) in the same value range as input.
     """
     # Luminance weights
     weights = torch.tensor([0.299, 0.587, 0.114], device=images.device, dtype=images.dtype)
 # =============================================================================
 def create_sobel_kernels(device: torch.device, dtype: torch.dtype) -> tuple:
+    """Create 3x3 Sobel edge-detection kernels for horizontal and vertical gradients.
+    Args:
+        device: Target device for the kernels.
+        dtype: Target dtype for the kernels.
     Returns:
+        Tuple of (sobel_x, sobel_y) kernels, each of shape (1, 1, 3, 3),
+        suitable for use with ``F.conv2d`` on single-channel input.
     """
     sobel_x = torch.tensor([
         [-1, 0, 1],
 def compute_gradients(grayscale: torch.Tensor) -> tuple:
+    """Compute horizontal and vertical image gradients using 3x3 Sobel filters.
+    Uses reflect-free padding=1 (zero-padded convolution) to maintain spatial size.
     Args:
+        grayscale: Single-channel images of shape (B, 1, H, W).
     Returns:
+        Tuple of (grad_x, grad_y, grad_magnitude), each (B, 1, H, W).
+        ``grad_magnitude`` = sqrt(grad_x^2 + grad_y^2 + 1e-8).
     """
     sobel_x, sobel_y = create_sobel_kernels(grayscale.device, grayscale.dtype)
     grad_y: torch.Tensor,
     grad_magnitude: torch.Tensor,
 ) -> torch.Tensor:
+    """Compute a radial-symmetry response map for circular-region detection.
+    The algorithm:
+    1. Estimates an initial center as the intensity-weighted center of mass of
+       dark regions (squared inverse intensity).
+    2. For each pixel, computes the dot product between the normalized gradient
+       vector and the unit vector pointing toward the estimated center.
+    3. Weights this alignment score by gradient magnitude and darkness.
+    4. Smooths the response with a separable Gaussian whose sigma is
+       proportional to the image size (kernel_size = max(H,W)//8, sigma = kernel_size/6).
+    High response indicates pixels whose gradients point radially inward toward
+    a dark center — characteristic of the fundus disc boundary.
     Args:
+        grayscale: Grayscale images (B, 1, H, W) in [0, 1].
+        grad_x: Horizontal gradient (B, 1, H, W).
+        grad_y: Vertical gradient (B, 1, H, W).
+        grad_magnitude: Gradient magnitude (B, 1, H, W).
     Returns:
+        Smoothed radial symmetry response map (B, 1, H, W).
     """
     B, _, H, W = grayscale.shape
     device = grayscale.device
 def soft_argmax_2d(response: torch.Tensor, temperature: float = 0.1) -> tuple:
+    """Find the sub-pixel peak location in a response map via softmax-weighted coordinates.
+    Divides the flattened response by ``temperature`` before applying softmax, then
+    computes the weighted mean of the (x, y) coordinate grids. Lower temperature yields
+    a sharper, more argmax-like result; higher temperature yields a broader average.
+    Caution: Very low temperatures (< 0.01) combined with large response magnitudes
+    can cause numerical overflow in the softmax exponential.
     Args:
+        response: Response map (B, 1, H, W).
+        temperature: Softmax temperature. Default 0.1.
     Returns:
+        Tuple of (cx, cy), each of shape (B,), in pixel coordinates.
     """
     B, _, H, W = response.shape
     device = response.device
     images: torch.Tensor,
     softmax_temperature: float = 0.1,
 ) -> tuple:
+    """Estimate the center of the fundus/eye disc in each image.
+    Pipeline: RGB → grayscale → Sobel gradients → radial symmetry response → soft argmax.
     Args:
+        images: RGB images of shape (B, 3, H, W) in [0, 1].
+        softmax_temperature: Temperature for the soft-argmax peak finder.
+            Lower values (0.01-0.1) give sharper localization; higher values
+            (0.3-0.5) give broader averaging, useful for noisy or low-contrast images.
+            Default 0.1.
     Returns:
+        Tuple of (cx, cy), each of shape (B,), in pixel coordinates.
     """
     grayscale = rgb_to_grayscale(images)
     grad_x, grad_y, grad_magnitude = compute_gradients(grayscale)
     min_radius_frac: float = 0.1,
     max_radius_frac: float = 0.5,
 ) -> torch.Tensor:
+    """Estimate the radius of the fundus disc by analyzing radial intensity profiles.
+    Samples grayscale intensity along ``num_angles`` rays emanating from ``(cx, cy)``
+    at ``num_radii`` radial distances. The per-radius mean intensity across all angles
+    gives a 1-D radial profile. The discrete derivative of this profile is linearly
+    weighted by radius (range 0.5–1.5) to bias toward the outer fundus boundary
+    rather than the smaller pupil boundary. The radius at the strongest weighted
+    negative gradient is selected as the disc edge.
+    Uses ``F.grid_sample`` with bilinear interpolation and border padding for
+    sub-pixel sampling.
     Args:
+        images: RGB images (B, 3, H, W) in [0, 1].
+        cx, cy: Center coordinates (B,) in pixel units.
+        num_radii: Number of radial sample points. Default 100.
+        num_angles: Number of angular sample rays. Default 36.
+        min_radius_frac: Minimum search radius as fraction of min(H, W). Default 0.1.
+        max_radius_frac: Maximum search radius as fraction of min(H, W). Default 0.5.
     Returns:
+        Estimated radius for each image (B,), clamped to [min_radius, max_radius].
     """
     B, _, H, W = images.shape
     device = images.device
     scale_factor: float = 1.1,
     allow_overflow: bool = False,
 ) -> tuple:
+    """Compute a square bounding box centered on the detected eye.
+    The half-side length is ``radius * scale_factor``. When ``allow_overflow`` is
+    False, the box is clamped to the image bounds and then made square by shrinking
+    to the shorter side and re-centering. The resulting box is guaranteed to be
+    square and fully within [0, W-1] x [0, H-1].
+    When ``allow_overflow`` is True the raw (possibly out-of-bounds) box is
+    returned, which is useful for images where the fundus disc is partially
+    clipped; out-of-bounds regions will be zero-filled during grid_sample.
     Args:
+        cx, cy: Detected eye center coordinates (B,).
+        radius: Estimated disc radius (B,).
+        H, W: Spatial dimensions of the source images.
+        scale_factor: Padding multiplier applied to ``radius``. Default 1.1.
+        allow_overflow: Skip clamping / squareness enforcement. Default False.
     Returns:
+        Tuple of (x1, y1, x2, y2), each of shape (B,), in pixel coordinates.
     """
     # Compute half side length
     half_side = radius * scale_factor
     output_size: int,
     padding_mode: str = 'border',
 ) -> torch.Tensor:
+    """Crop and resize images to a square using ``F.grid_sample`` (GPU-friendly).
+    Builds a regular output grid in [0, 1]^2, maps it to the source rectangle
+    [x1, x2] x [y1, y2] via affine scaling, normalizes to [-1, 1] for
+    ``grid_sample``, and samples with bilinear interpolation (``align_corners=True``).
+    Crop coordinates may extend beyond image bounds; the ``padding_mode``
+    controls how out-of-bounds pixels are filled.
     Args:
+        images: Input images (B, C, H, W).
+        x1, y1, x2, y2: Crop box corners (B,). May exceed [0, W-1] / [0, H-1].
+        output_size: Side length of the square output.
+        padding_mode: ``'border'`` (repeat edge, default) or ``'zeros'`` (black fill).
     Returns:
+        Cropped and resized images (B, C, output_size, output_size).
     """
     B, C, H, W = images.shape
     device = images.device
     return cropped
+#def batch_crop_and_resize_mask(
+#    masks: torch.Tensor,
+#    x1: torch.Tensor,
+#    y1: torch.Tensor,
+#    x2: torch.Tensor,
+#    y2: torch.Tensor,
+#    output_size: int,
+#    padding_mode: str = "zeros",
+#) -> torch.Tensor:
+#    """
+#    Crop and resize masks using nearest-neighbor sampling.
+#    """
+#    return batch_crop_and_resize(
+#        masks,
+#        x1, y1, x2, y2,
+#        output_size,
+#        padding_mode=padding_mode,
+#    )
+def batch_crop_and_resize_mask(
+    masks: torch.Tensor,      # (B, 1, H, W)
+    x1: torch.Tensor,
+    y1: torch.Tensor,
+    x2: torch.Tensor,
+    y2: torch.Tensor,
+    output_size: int,
+    padding_mode: str = "zeros",
+) -> torch.Tensor:
+    """Crop and resize segmentation masks using nearest-neighbor sampling.
+    Same spatial transform as ``batch_crop_and_resize`` but uses ``mode='nearest'``
+    to preserve discrete label values. The output is rounded and cast to ``torch.long``
+    to guard against floating-point drift in ``grid_sample``.
+    Args:
+        masks: Integer label masks (B, 1, H, W) — any dtype (converted to float internally).
+        x1, y1, x2, y2: Crop box corners (B,). May exceed image bounds.
+        output_size: Side length of the square output.
+        padding_mode: ``'zeros'`` (background = 0, default) or ``'border'`` (repeat edge).
+    Returns:
+        Cropped and resized masks (B, 1, output_size, output_size) as ``torch.long``.
+    """
+    B, C, H, W = masks.shape
+    device = masks.device
+    # grid_sample requires floating point input
+    masks_f = masks.float()
+    # Create output grid in [0, 1]
+    coords = torch.linspace(0, 1, output_size, device=device)
+    out_y, out_x = torch.meshgrid(coords, coords, indexing="ij")
+    out_grid = torch.stack([out_x, out_y], dim=-1)  # (S, S, 2)
+    out_grid = out_grid.unsqueeze(0).expand(B, -1, -1, -1)
+    # Reshape crop boxes
+    x1 = x1.view(B, 1, 1, 1)
+    y1 = y1.view(B, 1, 1, 1)
+    x2 = x2.view(B, 1, 1, 1)
+    y2 = y2.view(B, 1, 1, 1)
+    # Map [0, 1] → pixel coordinates
+    sample_x = x1 + out_grid[..., 0:1] * (x2 - x1)
+    sample_y = y1 + out_grid[..., 1:2] * (y2 - y1)
+    # Normalize to [-1, 1]
+    sample_x = 2.0 * sample_x / (W - 1) - 1.0
+    sample_y = 2.0 * sample_y / (H - 1) - 1.0
+    grid = torch.cat([sample_x, sample_y], dim=-1)
+    # Nearest-neighbor sampling with caller-specified padding
+    cropped = F.grid_sample(
+        masks_f,
+        grid,
+        mode="nearest",
+        padding_mode=padding_mode,
+        align_corners=True,
+    )
+    # Round before converting to handle floating point errors from grid_sample.
+    # Even with mode="nearest", grid_sample can produce values like 0.9999999
+    # which would truncate to 0 instead of rounding to 1.
+    return cropped.round().long()
 # =============================================================================
 # PHASE 4: CLAHE (Torch-Native)
 # =============================================================================
 def _srgb_to_linear(rgb: torch.Tensor) -> torch.Tensor:
+    """Apply the sRGB electro-optical transfer function (EOTF) to convert sRGB to linear RGB.
+    Uses the IEC 61966-2-1 piecewise formula with threshold 0.04045.
+    """
     threshold = 0.04045
     linear = torch.where(
         rgb <= threshold,
 def _linear_to_srgb(linear: torch.Tensor) -> torch.Tensor:
+    """Apply the inverse sRGB EOTF to convert linear RGB to sRGB.
+    Uses the IEC 61966-2-1 piecewise formula with threshold 0.0031308.
+    Input must be non-negative; negative values will produce NaN from the power function.
+    """
     threshold = 0.0031308
     srgb = torch.where(
         linear <= threshold,
 def rgb_to_lab(images: torch.Tensor) -> tuple:
+    """Convert sRGB images to CIE LAB colour space (D65 illuminant).
+    Conversion chain: sRGB → linear RGB → CIE XYZ → CIE LAB.
+    The raw LAB values are rescaled for internal convenience:
+    - L ∈ [0, 100]  → L / 100        → [0, 1]
+    - a ∈ ~[-128, 127] → a / 256 + 0.5 → ~[0, 1]
+    - b ∈ ~[-128, 127] → b / 256 + 0.5 → ~[0, 1]
+    These normalised values are **not** standard LAB; use ``lab_to_rgb`` to
+    invert them back to sRGB.
     Args:
+        images: RGB images (B, 3, H, W) in [0, 1] sRGB.
     Returns:
+        Tuple of (L, a, b_ch), each (B, 1, H, W):
+            - L: Normalised luminance in [0, 1].
+            - a: Normalised green–red chrominance, roughly [0, 1].
+            - b_ch: Normalised blue–yellow chrominance, roughly [0, 1].
     """
     device = images.device
     dtype = images.dtype
 def lab_to_rgb(L: torch.Tensor, a: torch.Tensor, b_ch: torch.Tensor) -> torch.Tensor:
+    """Convert normalised CIE LAB back to sRGB (inverse of ``rgb_to_lab``).
+    Denormalisation: L*100, (a-0.5)*256, (b_ch-0.5)*256, then LAB → XYZ → linear RGB → sRGB.
+    Output is clamped to [0, 1].
     Args:
+        L: Normalised luminance (B, 1, H, W) in [0, 1].
+        a: Normalised green–red chrominance (B, 1, H, W), roughly [0, 1].
+        b_ch: Normalised blue–yellow chrominance (B, 1, H, W), roughly [0, 1].
     Returns:
+        sRGB images (B, 3, H, W) clamped to [0, 1].
     """
     # Denormalize
     L_lab = L * 100.0
     tensor: torch.Tensor,
     num_bins: int = 256,
 ) -> torch.Tensor:
+    """Compute per-image histograms for a batch of single-channel images.
+    Bins are uniformly spaced over [0, 1]. Each pixel is assigned to a bin via
+    ``floor(value * (num_bins - 1))``, accumulated with ``scatter_add`` in a
+    per-sample loop.
+    Note: This function is used only by ``clahe_single_tile``.
+    The vectorized CLAHE path (``apply_clahe_vectorized``) computes histograms
+    inline for better GPU efficiency.
     Args:
+        tensor: Input (B, 1, H, W) with values in [0, 1].
+        num_bins: Number of histogram bins. Default 256.
     Returns:
+        Histograms of shape (B, num_bins), dtype matching input.
     """
     B = tensor.shape[0]
     device = tensor.device
     clip_limit: float,
     num_bins: int = 256,
 ) -> torch.Tensor:
+    """Compute the clipped-and-redistributed CDF for a single CLAHE tile.
+    Clips the histogram so no bin exceeds ``clip_limit * num_pixels / num_bins``,
+    redistributes the excess uniformly, then computes and min-max normalises the CDF.
+    Note: This function is not used by the main pipeline — see
+    ``apply_clahe_vectorized`` which processes all tiles in a single pass.
     Args:
+        tile: Single-channel tile images (B, 1, tile_h, tile_w) in [0, 1].
+        clip_limit: Relative clip limit (higher = less contrast limiting).
+        num_bins: Number of histogram bins. Default 256.
     Returns:
+        Normalised CDF lookup table (B, num_bins) in [0, 1].
     """
     B, _, tile_h, tile_w = tile.shape
     device = tile.device
     clip_limit: float = 2.0,
     num_bins: int = 256,
 ) -> torch.Tensor:
+    """Fully-vectorized CLAHE (Contrast Limited Adaptive Histogram Equalisation).
+    For RGB input, converts to CIE LAB, applies CLAHE to the L channel only,
+    then converts back to sRGB. For single-channel input, operates directly.
+    Algorithm:
+    1. Pads the luminance channel to be divisible by ``grid_size`` (reflect padding).
+    2. Reshapes into ``grid_size x grid_size`` non-overlapping tiles.
+    3. Computes a histogram per tile via ``scatter_add_`` (fully batched, no loops).
+    4. Clips each histogram at ``clip_limit * num_pixels / num_bins`` and
+       redistributes excess counts uniformly across all bins.
+    5. Computes the cumulative distribution function (CDF) per tile and
+       min-max normalises it to [0, 1].
+    6. Maps each output pixel to the four surrounding tile centres and
+       bilinearly interpolates their CDF values for a smooth result.
     Args:
+        images: Input images (B, C, H, W) in [0, 1]. C must be 1 or 3.
+        grid_size: Tile grid resolution (tiles per axis). Default 8.
+        clip_limit: Relative clip limit for histogram clipping. Default 2.0.
+        num_bins: Number of histogram bins. Default 256.
     Returns:
+        CLAHE-enhanced images (B, C, H, W) in [0, 1].
     """
     B, C, H, W = images.shape
     device = images.device
     mode: str = 'bilinear',
     antialias: bool = True,
 ) -> torch.Tensor:
+    """Resize images to a square target size using ``F.interpolate``.
     Args:
+        images: Input images (B, C, H, W). Must be float for bilinear/bicubic modes.
+        size: Target side length (output is always square).
+        mode: Interpolation mode (``'bilinear'``, ``'bicubic'``, ``'nearest'``, etc.).
+            Default ``'bilinear'``.
+        antialias: Enable antialiasing for bilinear/bicubic downscaling. Default True.
     Returns:
+        Resized images (B, C, size, size).
     """
     return F.interpolate(
         images,
     std: Optional[List[float]] = None,
     mode: str = 'imagenet',
 ) -> torch.Tensor:
+    """Channel-wise normalisation: ``(image - mean) / std``.
     Args:
+        images: Input images (B, C, H, W) in [0, 1].
+        mean: Per-channel means (length C). Required when ``mode='custom'``.
+        std: Per-channel stds  (length C). Required when ``mode='custom'``.
+        mode: ``'imagenet'`` (uses ImageNet stats), ``'none'`` (identity), or
+            ``'custom'`` (uses caller-supplied mean/std). Default ``'imagenet'``.
     Returns:
+        Normalised images (B, C, H, W). Range depends on mean/std.
     """
     if mode == 'none':
         return images
 # =============================================================================
 class EyeCLAHEImageProcessor(BaseImageProcessor):
+    """GPU-native Hugging Face image processor for Colour Fundus Photography (CFP).
+    Processing pipeline (all steps optional via constructor flags):
+    1. **Eye localisation** (``do_crop=True``): detects the fundus disc centre via
+       gradient-based radial symmetry (dark-region centre-of-mass → Sobel gradients →
+       radial alignment score → Gaussian smoothing → soft argmax) and estimates the
+       disc radius from the strongest negative radial intensity gradient.
+    2. **Square crop & resize**: crops a square region around the detected disc
+       (``radius * crop_scale_factor``), optionally allowing overflow beyond image
+       bounds (``allow_overflow``), then resamples to ``size x size`` via bilinear
+       ``grid_sample``.  When ``do_crop=False``, the whole image is resized directly.
+    3. **CLAHE** (``do_clahe=True``): applies Contrast Limited Adaptive Histogram
+       Equalisation to the CIE LAB luminance channel, using a fully-vectorized
+       tile-based implementation with bilinear CDF interpolation.
+    4. **Normalisation**: channel-wise ``(image - mean) / std`` with configurable
+       mode (ImageNet, custom, or none).
+    The processor also returns per-image coordinate-mapping scalars (``scale_x/y``,
+    ``offset_x/y``) so that predictions in processed-image space can be mapped back
+    to original pixel coordinates.
+    All operations are pure PyTorch — no OpenCV, PIL, or NumPy at runtime — and are
+    CUDA-compatible and batch-friendly.
     """
     model_input_names = ["pixel_values"]
     def preprocess(
         self,
         images,
+        masks=None,
         return_tensors: str = "pt",
         device: Optional[Union[str, torch.device]] = None,
         **kwargs,
     ) -> BatchFeature:
+        """Run the full preprocessing pipeline on a batch of images.
+        Accepts any combination of torch.Tensor, PIL.Image, or numpy.ndarray inputs
+        (see ``standardize_input`` for format details). Optionally processes
+        accompanying segmentation masks with matching spatial transforms.
         Args:
+            images: Input images in any supported format.
+            masks: Optional segmentation masks in any format accepted by
+                ``standardize_mask_input``.  Undergo the same crop/resize as images
+                (nearest-neighbour interpolation, label-preserving).  Returned as
+                ``torch.long`` under the ``"mask"`` key (or ``None`` if not provided).
+            return_tensors: Only ``"pt"`` is supported.
+            device: Device for all tensor operations (e.g. ``"cuda:0"``).
+                Defaults to the device of the input tensor, or CPU for PIL/numpy.
+            **kwargs: Passed through to ``BaseImageProcessor``.
         Returns:
+            ``BatchFeature`` with keys:
+            - ``pixel_values`` (B, 3, size, size): Processed float32 images.
+            - ``mask`` (B, 1, size, size) or ``None``: Processed long masks.
+            - ``scale_x``, ``scale_y`` (B,): Per-image scale factors.
+            - ``offset_x``, ``offset_y`` (B,): Per-image offsets.
+            Coordinate mapping from processed → original pixel space::
+                orig_x = offset_x + proc_x * scale_x
+                orig_y = offset_y + proc_y * scale_y
         """
         if return_tensors != "pt":
             raise ValueError("Only 'pt' (PyTorch) tensors are supported")
         # Standardize input
         images = standardize_input(images, device)
+        if masks is not None:
+            masks = standardize_mask_input(masks, device)
         B, C, H_orig, W_orig = images.shape
         if self.do_crop:
             # Use 'zeros' padding when allow_overflow is True to fill out-of-bounds with black
             padding_mode = 'zeros' if self.allow_overflow else 'border'
             images = batch_crop_and_resize(images, x1, y1, x2, y2, self.size, padding_mode=padding_mode)
+            if masks is not None:
+                masks = batch_crop_and_resize_mask(
+                    masks, x1, y1, x2, y2,
+                    self.size,
+                    padding_mode=padding_mode,
+                )
         else:
             # Just resize - no crop
             # Compute coordinate mapping for direct resize
             offset_y = torch.zeros(B, device=device, dtype=images.dtype)
             images = resize_images(images, self.size)
+            if masks is not None:
+                # F.interpolate requires float input; cast, resize, then restore long
+                masks = resize_images(masks.float(), self.size, mode="nearest", antialias=False).round().long()
         # Apply CLAHE
         if self.do_clahe:
             images = apply_clahe_vectorized(
         )
         # Return with coordinate mapping information (flattened structure)
+        data = {
+            "pixel_values": images,
+            "scale_x": scale_x,
+            "scale_y": scale_y,
+            "offset_x": offset_x,
+            "offset_y": offset_y,
+        }
+        if masks is not None:
+            data["mask"] = masks
+        return BatchFeature(data=data, tensor_type="pt")
     def __call__(
         self,
         images: Union[torch.Tensor, List[torch.Tensor]],
         **kwargs,
     ) -> BatchFeature:
+        """Alias for ``preprocess`` — enables ``processor(images, ...)`` call syntax."""
         return self.preprocess(images, **kwargs)