Upload ct_binary_coronary_segmentation/scripts/transforms.py with huggingface_hub

2e35511 verified about 1 month ago

15.4 kB

	"""Custom MONAI transforms for binary coronary artery segmentation."""

	import json

	import numpy as np
	from pathlib import Path
	from typing import Dict, Hashable, Mapping, Optional, Any

	import torch
	from monai import transforms
	from monai.config.type_definitions import KeysCollection, NdarrayOrTensor
	from monai.utils.enums import TransformBackends
	from scipy import ndimage


	class ApplyWindowing(transforms.Transform):
	"""
	Apply window presets to DICOM images.

	Windowing adapts the greyscale component of a CT image to highlight particular structures
	by reducing the range of Hounsfield units (HU) to be displayed.

	Args:
	window: a string for preset windows (brain, subdural, stroke, temporal bone,
	lungs, abdomen, liver, bone).
	upper: upper threshold for windowing
	lower: lower threshold for windowing
	width: window width
	level: window level (or window center)

	Raises:
	ValueError: if none or multiple of window/lower+upper/width+level are specified.
	"""

	backend = [TransformBackends.TORCH, TransformBackends.NUMPY]

	def __init__(
	self,
	window: Optional[str] = None,
	upper: Optional[int] = None,
	lower: Optional[int] = None,
	width: Optional[int] = None,
	level: Optional[int] = None,
	):
	error_message = "Please specifiy either window or upper/lower or width/level."
	if window:
	if upper or lower:
	raise ValueError(error_message)
	if width or level:
	raise ValueError(error_message)
	elif upper and lower:
	if window:
	raise ValueError(error_message)
	if width or level:
	raise ValueError(error_message)
	elif width and level:
	if upper or lower:
	raise ValueError(error_message)
	if window:
	raise ValueError(error_message)
	else:
	raise ValueError(error_message)

	if window:
	if window == "brain":
	width, level = 80, 40
	elif window == "subdural":
	width, level = 130, 50
	elif window == "stroke":
	width, level = 8, 40
	elif window == "temporal bone":
	width, level = 2800, 700
	elif window == "lungs":
	width, level = 150, -600
	elif window == "abdomen":
	width, level = 400, 50
	elif window == "liver":
	width, level = 150, 30
	elif window == "bone":
	width, level = 1800, 400

	if width and level:
	upper = level + width // 2
	lower = level - width // 2

	self.upper = upper
	self.lower = lower

	def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
	return img.clip(self.lower, self.upper)


	class ApplyWindowingd(transforms.MapTransform):
	"Dictionary-based wrapper of :py:class:`ApplyWindowing`."

	def __init__(
	self,
	keys: KeysCollection,
	window: Optional[str] = None,
	upper: Optional[int] = None,
	lower: Optional[int] = None,
	width: Optional[int] = None,
	level: Optional[int] = None,
	allow_missing_keys: bool = False,
	):
	super().__init__(keys=keys, allow_missing_keys=allow_missing_keys)
	self.windowing = ApplyWindowing(
	window=window, upper=upper, lower=lower, width=width, level=level
	)

	def __call__(
	self, data: Mapping[Hashable, NdarrayOrTensor]
	) -> Dict[Hashable, NdarrayOrTensor]:
	d = dict(data)
	for key in self.key_iterator(d):
	d[key] = self.windowing(d[key])
	return d


	# =============================================================================
	# Normalization
	# =============================================================================


	def _to_numpy(img: NdarrayOrTensor) -> np.ndarray:
	"""Convert tensor to numpy for percentile/statistics computation."""
	if isinstance(img, torch.Tensor):
	return img.cpu().numpy()
	return np.asarray(img)


	def _from_numpy(arr: np.ndarray, reference: NdarrayOrTensor) -> NdarrayOrTensor:
	"""Convert numpy back to the same type as reference, preserving MetaTensor metadata."""
	if isinstance(reference, torch.Tensor):
	result = torch.from_numpy(arr).to(reference.device)
	if hasattr(reference, 'meta'):
	from monai.data import MetaTensor
	result = MetaTensor(result, meta=reference.meta)
	return result
	return arr


	class ZScoreForegroundNormalize(transforms.Transform):
	"""
	Z-score normalization using only non-background voxels.

	Applied AFTER windowing. Computes mean and std only from voxels above
	a threshold (excluding background/air), then normalizes the entire image.

	Args:
	background_threshold: Voxels below this value are considered background.
	After windowing to [-100, 900], -50 excludes low-intensity regions.
	"""

	backend = [TransformBackends.TORCH, TransformBackends.NUMPY]

	def __init__(self, background_threshold: float = -50) -> None:
	self.background_threshold = background_threshold

	def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
	arr = _to_numpy(img)
	mask = arr > self.background_threshold
	if mask.sum() > 0:
	mean = arr[mask].mean()
	std = arr[mask].std()
	arr = (arr - mean) / (std + 1e-8)
	else:
	arr = (arr - arr.mean()) / (arr.std() + 1e-8)
	return _from_numpy(arr.astype(np.float32), img)


	class ZScoreForegroundNormalized(transforms.MapTransform):
	"""Dictionary-based wrapper of :py:class:`ZScoreForegroundNormalize`."""

	def __init__(
	self,
	keys: KeysCollection,
	background_threshold: float = -50,
	allow_missing_keys: bool = False,
	) -> None:
	super().__init__(keys=keys, allow_missing_keys=allow_missing_keys)
	self.normalizer = ZScoreForegroundNormalize(
	background_threshold=background_threshold
	)

	def __call__(
	self, data: Mapping[Hashable, NdarrayOrTensor]
	) -> Dict[Hashable, NdarrayOrTensor]:
	d = dict(data)
	for key in self.key_iterator(d):
	d[key] = self.normalizer(d[key])
	return d


	# =============================================================================
	# Centerline extraction
	# =============================================================================


	def _get_neighbors(point, skel_arr):
	"""Get 26-connected skeleton neighbors of a point."""
	neighbors = []
	for dx in (-1, 0, 1):
	for dy in (-1, 0, 1):
	for dz in (-1, 0, 1):
	if dx == 0 and dy == 0 and dz == 0:
	continue
	nb = (point[0] + dx, point[1] + dy, point[2] + dz)
	if (0 <= nb[0] < skel_arr.shape[0]
	and 0 <= nb[1] < skel_arr.shape[1]
	and 0 <= nb[2] < skel_arr.shape[2]
	and skel_arr[nb]):
	neighbors.append(nb)
	return neighbors


	def _trace_branch(start, skel_arr, visited, branch_points):
	"""Trace a single branch from start until an endpoint or branch point.

	Follows the skeleton greedily through unvisited voxels. Stops when
	hitting a dead end, a branch point, or a previously visited voxel.
	Returns the ordered list of voxel coordinates along the branch.
	"""
	path = [start]
	visited.add(start)
	current = start
	while True:
	nbs = [n for n in _get_neighbors(current, skel_arr) if n not in visited]
	if not nbs:
	break
	if len(nbs) == 1:
	current = nbs[0]
	visited.add(current)
	path.append(current)
	if current in branch_points:
	break
	else:
	# Multiple unvisited neighbors — pick closest to current direction
	if len(path) >= 2:
	direction = np.array(path[-1]) - np.array(path[-2])
	dists = [np.dot(np.array(n) - np.array(current), direction) for n in nbs]
	best = nbs[int(np.argmax(dists))]
	else:
	best = nbs[0]
	current = best
	visited.add(current)
	path.append(current)
	if current in branch_points:
	break
	return path


	def _smooth_branch(points, affine, smoothing_factor=2.0):
	"""Fit a B-spline to branch points and resample at ~1mm intervals.

	Args:
	points: List of (x, y, z) voxel coordinates.
	affine: 4x4 affine matrix mapping voxel to physical (mm).
	smoothing_factor: Spline smoothing (higher = smoother).

	Returns:
	List of [x, y, z] physical coordinates (mm), rounded to 2 decimals.
	"""
	from scipy.interpolate import splprep, splev

	pts = np.array(points, dtype=float)

	# Convert to physical coordinates
	ones = np.ones((len(pts), 1))
	homogeneous = np.hstack([pts, ones]) # (N, 4)
	physical = (affine @ homogeneous.T).T[:, :3] # (N, 3)

	if len(physical) < 4:
	return [[round(float(c), 2) for c in p] for p in physical]

	try:
	k = min(3, len(physical) - 1)
	tck, u = splprep(
	[physical[:, 0], physical[:, 1], physical[:, 2]],
	s=len(physical) * smoothing_factor,
	k=k,
	)
	# Compute arc length and resample at ~1mm
	diffs = np.diff(physical, axis=0)
	total_length = float(np.sum(np.sqrt(np.sum(diffs ** 2, axis=1))))
	n_out = max(int(total_length), 4)
	u_new = np.linspace(0, 1, n_out)
	smooth = np.array(splev(u_new, tck)).T
	return [[round(float(c), 2) for c in p] for p in smooth]
	except Exception:
	return [[round(float(c), 2) for c in p] for p in physical]


	def extract_centerlines(binary_mask, affine, min_branch_points=3,
	min_length_mm=5.0, smoothing_factor=2.0):
	"""Extract vessel centerlines from a binary mask.

	Args:
	binary_mask: 3D numpy array (bool or int).
	affine: 4x4 affine matrix (voxel to mm).
	min_branch_points: Discard branches with fewer raw skeleton points.
	min_length_mm: Discard branches shorter than this (mm) after smoothing.
	smoothing_factor: Spline smoothing parameter.

	Returns:
	Dict with 'branches' list, each containing 'id', 'points_mm',
	'length_mm', and 'n_points'.
	"""
	from skimage.morphology import skeletonize

	arr = np.asarray(binary_mask).squeeze().astype(bool)
	if not arr.any():
	return {"branches": []}

	skel = skeletonize(arr)

	# Classify skeleton voxels by neighbor count (26-connectivity)
	struct = ndimage.generate_binary_structure(3, 3)
	neighbor_count = ndimage.convolve(
	skel.astype(np.int32), struct.astype(np.int32), mode="constant"
	) - skel.astype(np.int32)

	endpoints = set(map(tuple, np.argwhere(skel & (neighbor_count == 1))))
	branch_points = set(map(tuple, np.argwhere(skel & (neighbor_count >= 3))))

	# Trace branches starting from endpoints first, then branch points
	visited = set()
	raw_branches = []
	for start in list(endpoints) + list(branch_points):
	if start in visited:
	continue
	path = _trace_branch(start, skel, visited, branch_points)
	if len(path) >= min_branch_points:
	raw_branches.append(path)
	# Also explore unvisited directions from branch points
	if start in branch_points:
	for nb in _get_neighbors(start, skel):
	if nb not in visited:
	path2 = _trace_branch(nb, skel, visited, branch_points)
	if len(path2) >= min_branch_points:
	raw_branches.append([start] + path2)

	# Smooth, convert to physical coordinates, and filter by length
	affine_np = np.array(affine, dtype=float)
	branches = []
	branch_id = 0
	for raw in raw_branches:
	pts_mm = _smooth_branch(raw, affine_np, smoothing_factor)
	if len(pts_mm) < 2:
	continue
	diffs = np.diff(pts_mm, axis=0)
	length = float(np.sum(np.sqrt(np.sum(np.array(diffs) ** 2, axis=1))))
	if length < min_length_mm:
	continue
	branches.append({
	"id": branch_id,
	"points_mm": pts_mm,
	"length_mm": round(length, 2),
	"n_points": len(pts_mm),
	})
	branch_id += 1

	return {"branches": branches}


	class ExtractCenterlinesd(transforms.MapTransform):
	"""Extract vessel centerlines from binary mask and save as JSON.

	Post-processing transform for inference. Takes the predicted binary mask,
	extracts a spline-smoothed centerline, and writes a JSON file with
	ordered branch points in physical (mm) coordinates.

	Output file: ``{output_dir}/{patient_name}_centerline.json``

	Args:
	keys: Key of the binary mask prediction (typically "pred").
	image_key: Key of the input image (for filename extraction).
	output_dir: Directory to write JSON files.
	min_branch_points: Minimum raw skeleton points per branch.
	min_length_mm: Discard branches shorter than this (mm).
	smoothing_factor: B-spline smoothing (higher = smoother).
	"""

	def __init__(
	self,
	keys: KeysCollection,
	image_key: str = "image",
	output_dir: str = "./output",
	min_branch_points: int = 3,
	min_length_mm: float = 5.0,
	smoothing_factor: float = 2.0,
	allow_missing_keys: bool = False,
	) -> None:
	super().__init__(keys=keys, allow_missing_keys=allow_missing_keys)
	self.image_key = image_key
	self.output_dir = output_dir
	self.min_branch_points = min_branch_points
	self.min_length_mm = min_length_mm
	self.smoothing_factor = smoothing_factor

	def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
	d = dict(data)
	for key in self.key_iterator(d):
	pred = d[key]
	mask_np = _to_numpy(pred)

	# Get affine from prediction metadata
	affine = np.eye(4)
	if hasattr(pred, "meta") and "affine" in pred.meta:
	affine = np.array(pred.meta["affine"], dtype=float)

	centerlines = extract_centerlines(
	mask_np, affine,
	min_branch_points=self.min_branch_points,
	min_length_mm=self.min_length_mm,
	smoothing_factor=self.smoothing_factor,
	)

	# Derive output filename from image metadata
	filename = "unknown"
	img = d.get(self.image_key)
	if img is not None and hasattr(img, "meta"):
	raw = img.meta.get("filename_or_obj", "unknown")
	filename = Path(str(raw)).stem
	for suffix in (".nii", ".nrrd", ".dcm"):
	filename = filename.replace(suffix, "")

	out_path = Path(self.output_dir) / f"{filename}_centerline.json"
	out_path.parent.mkdir(parents=True, exist_ok=True)
	with open(out_path, "w") as f:
	json.dump(centerlines, f)

	return d