fomofo
/

tap-ct-b-2-5d

Safetensors

tapct

custom_code

Model card Files Files and versions

xet

Community

TimVeenboer commited on Jan 7

Commit

9be891b

1 Parent(s): 8650a91

feat(tap-hf): image processor

Browse files

Files changed (2) hide show

preprocessor_config.json +12 -0
tapct_processor.py +179 -0

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "image_processor_type": "TAPCTProcessor",
+    "resize_dims": [224, 224],
+    "divisible_pad_z": 1,
+    "clip_range": [-1008.0, 822.0],
+    "norm_mean": -86.80862426757812,
+    "norm_std": 322.63470458984375,
+    "auto_map": {
+        "AutoImageProcessor": "tapct_processor.TAPCTProcessor"
+    }
+}

tapct_processor.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from typing import Union
+import numpy as np
+import torch
+import torch.nn.functional as F
+from transformers.image_processing_utils import BaseImageProcessor
+class TAPCTProcessor(BaseImageProcessor):
+    """
+    Image processor for TAP-CT 3D volumes.
+    Processes CT volumes with the following pipeline:
+    1. Spatial Resizing: Resize to (z, H', W') where H', W' are resize_dims
+    2. Axial Padding: Pad z-axis with -1024 HU for divisibility by patch size
+    3. Intensity Clipping: Clip to HU range
+    4. Normalization: Z-score normalization
+    Parameters
+    ----------
+    resize_dims : tuple[int, int], default=(224, 224)
+        Target spatial dimensions (H, W) for resizing.
+    divisible_pad_z : int, default=4
+        Pad the z-axis to be divisible by this value.
+    clip_range : tuple[float, float], default=(-1008.0, 822.0)
+        HU intensity clipping range (min, max).
+    norm_mean : float, default=-86.80862426757812
+        Mean for z-score normalization.
+    norm_std : float, default=322.63470458984375
+        Standard deviation for z-score normalization.
+    **kwargs
+        Additional arguments passed to BaseImageProcessor.
+    """
+    model_input_names = ["pixel_values"]
+    def __init__(
+        self,
+        resize_dims: tuple[int, int] = (224, 224),
+        divisible_pad_z: int = 4,
+        clip_range: tuple[float, float] = (-1008.0, 822.0),
+        norm_mean: float = -86.80862426757812,
+        norm_std: float = 322.63470458984375,
+        **kwargs
+    ) -> None:
+        super().__init__(**kwargs)
+        self.resize_dims = resize_dims
+        self.divisible_pad_z = divisible_pad_z
+        self.clip_range = clip_range
+        self.norm_mean = norm_mean
+        self.norm_std = norm_std
+    def preprocess(
+        self,
+        images: Union[torch.Tensor, np.ndarray],
+        return_tensors: str = "pt",
+        **kwargs
+    ) -> dict[str, torch.Tensor]:
+        """
+        Preprocess CT volumes.
+        Parameters
+        ----------
+        images : torch.Tensor or np.ndarray
+            Input tensor or numpy array of shape (B, C, D, H, W) where
+            B=batch, C=channels, D=depth/slices, H=height, W=width.
+        return_tensors : str, default="pt"
+            Return format. Only "pt" (PyTorch) is supported.
+        **kwargs
+            Additional keyword arguments (unused).
+        Returns
+        -------
+        dict[str, torch.Tensor]
+            Dictionary with "pixel_values" containing processed tensor of shape
+            (B, C, D', H', W') where D' may be padded for divisibility.
+        Raises
+        ------
+        ValueError
+            If return_tensors is not "pt" or input is not 5D.
+        """
+        if return_tensors != "pt":
+            raise ValueError(f"Only 'pt' return_tensors is supported, got {return_tensors}")
+        # Convert numpy to tensor if needed
+        if isinstance(images, np.ndarray):
+            images = torch.from_numpy(images)
+        # Ensure float32 dtype for processing
+        images = images.float()
+        # Validate input shape
+        if images.ndim != 5:
+            raise ValueError(f"Expected 5D input (B, C, D, H, W), got shape {images.shape}")
+        B, C, D, H, W = images.shape
+        # Step 1: Spatial Resizing - resize H, W dimensions to resize_dims
+        target_h, target_w = self.resize_dims
+        if H != target_h or W != target_w:
+            images = self._resize_spatial(images, target_h, target_w)
+        # Step 2: Axial Padding - pad z-axis with -1024 for divisibility
+        images = self._pad_axial(images)
+        # Step 3: Intensity Clipping - clip to HU range
+        images = torch.clamp(images, min=self.clip_range[0], max=self.clip_range[1])
+        # Step 4: Z-score Normalization
+        images = (images - self.norm_mean) / self.norm_std
+        return {"pixel_values": images}
+    def _resize_spatial(
+        self,
+        images: torch.Tensor,
+        target_h: int,
+        target_w: int
+    ) -> torch.Tensor:
+        """
+        Resize spatial dimensions (H, W) using trilinear interpolation.
+        Parameters
+        ----------
+        images : torch.Tensor
+            Tensor of shape (B, C, D, H, W).
+        target_h : int
+            Target height.
+        target_w : int
+            Target width.
+        Returns
+        -------
+        torch.Tensor
+            Resized tensor of shape (B, C, D, target_h, target_w).
+        """
+        D = images.shape[2]
+        # Apply trilinear interpolation, keeping depth unchanged
+        images = F.interpolate(
+            images,
+            size=(D, target_h, target_w),
+            mode='trilinear',
+            align_corners=False
+        )
+        return images
+    def _pad_axial(self, images: torch.Tensor) -> torch.Tensor:
+        """
+        Pad the axial (z/depth) dimension with -1024 HU for divisibility.
+        Parameters
+        ----------
+        images : torch.Tensor
+            Tensor of shape (B, C, D, H, W).
+        Returns
+        -------
+        torch.Tensor
+            Padded tensor of shape (B, C, D', H, W) where D' is divisible
+            by divisible_pad_z.
+        """
+        D = images.shape[2]
+        remainder = D % self.divisible_pad_z
+        if remainder == 0:
+            return images
+        pad_z = self.divisible_pad_z - remainder
+        # F.pad expects padding in reverse dimension order: (W_l, W_r, H_l, H_r, D_l, D_r, ...)
+        # To pad depth at the end: (0, 0, 0, 0, 0, pad_z)
+        padding = (0, 0, 0, 0, 0, pad_z)
+        images = F.pad(images, padding, mode='constant', value=-1024.0)
+        return images