Spaces:

chocodo
/

dental-segmentation

Sleeping

App Files Files Community

dental-segmentation / utils /preprocessing.py

chocodo

Initial commit: torchvision Mask R-CNN

97229a3 18 days ago

Raw

History Blame Contribute Delete

9.55 kB

	"""
	Preprocessing
	=======================
	Image preprocessing pipeline for dental panoramic X-ray images.

	"""

	import os
	import json
	import numpy as np
	import cv2
	import skimage.io
	import skimage.draw
	import random
	import copy
	from pathlib import Path
	from typing import Tuple, List, Dict, Optional


	# Load DICOM image (if the case)

	def load_dicom(image_path):
	"""
	Loads DICOM files and returns RGB uint8.

	"""
	try:
	import pydicom
	except ImportError:
	raise ImportError(
	"requires pydicom library." \
	"Install it using: pip install pydicom"
	)

	dicom_img = pydicom.dcmread(image_path)
	array = dicom_img.pixel_array.astype(np.float32)
	a_min, a_max = array.min(), array.max()
	if a_max > a_min:
	array = (array - a_min)/ (a_max - a_min) * 255.0
	array = array.astype(np.uint8)
	if array.ndim == 2:
	array = np.stack([array] * 3, axis = -1)
	elif array.ndim == 3 and array.shape[-1] == 1:
	array = np.concatenate([array]*3, axis = -1)

	return array


	# Image loading

	def load_image(path):
	"""
	Load a dental image and return as RGB uint8.
	Handles grayscale X-rays (→ 3-channel), RGBA (alpha dropped), TIF, PNG, JPEG and DICOM

	JPEG, PNG, TIFF are handled via skimage
	DICOM via pydicom
	Greyscale is converted to 3-channel (acting like RGB)
	RGBA - alpha channel dropped

	"""
	suffix = Path(path).suffix.lower()
	if suffix == ".dcm":
	return load_dicom(path)
	image = skimage.io.imread(path)
	if image.ndim == 2:
	image = np.stack([image] * 3, axis=-1)
	elif image.ndim == 3 and image.shape[-1] == 4:
	image = image[:,:,:3]
	elif image.ndim == 3 and image.shape[-1] == 1:
	image = np.concatenate([image]*3, axis = -1)
	return image.astype(np.uint8)


	# Contrast enhancement

	def enhance_contrast(image, method = "clahe"):
	"""
	Enhance contrast of a dental X-ray.

	Args:
	image: RGB uint8 image.
	method: 'clahe' (default) or 'histogram_eq' — global.

	CLAHE is preferred for panoramic X-rays, it sharpens tooth
	boundaries without over-amplifying noise in the dense bone background.
	"""
	if method == "clahe":
	lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	lab[:, :, 0] = clahe.apply(lab[:, :, 0]) # applied only on the L channel
	return cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
	elif method == "histogram_eq":
	channels = [cv2.equalizeHist(image[:, :, c]) for c in range(3)]
	return np.stack(channels, axis=-1)
	else:
	raise ValueError(f"Unknown method '{method}'. Use 'clahe' or 'histogram_eq'.")


	# Resize

	def resize_image(image, min_dim = 512, max_dim = 1024):
	"""
	Resize preserving aspect ratio, then zero-pad to a multiple of 64.
	Panoramic images (1092x512) fit naturally into 512-1024 range.

	Returns:
	(resized_image, scale, (top, bottom, left, right) padding size)
	"""
	h, w = image.shape[:2]
	scale = max(min_dim / min(h, w), 1.0)
	if round(max(h, w) * scale) > max_dim:
	scale = max_dim / max(h, w)

	new_h, new_w = round(h * scale), round(w * scale)
	image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)


	# ResNet halves image dimensions through layer pooling.
	# Avoiding image mismatch by making the image divisible by 64 - padding to the nearest multiple of 64
	pad_h = (64 - new_h % 64) % 64
	pad_w = (64 - new_w % 64) % 64
	top, bottom = pad_h // 2, pad_h - pad_h // 2 # splits padding equally
	left, right = pad_w // 2, pad_w - pad_w // 2
	image = np.pad(image, ((top, bottom), (left, right), (0, 0)), mode="constant")
	return image, scale, (top, bottom, left, right)


	def normalize_image(image):
	"""Normalize pixel values to [0, 1] float32."""
	return image.astype(np.float32) / 255.0


	# COCO annotation parsing

	def load_coco_annotations(json_path):
	"""
	Load a COCO-format annotation file.

	Returns the full dict with keys: info, images, annotations, categories.
	"""
	with open(json_path) as f:
	return json.load(f)


	def get_image_annotations(coco, filename):
	"""
	Return all annotation dicts for a given image filename.

	Args:
	coco: Loaded COCO dict. (coco annotation json file)
	filename: Image filename, e.g. '001.jpg'.

	Returns:
	a list that contains annotation dictionaries
	"""
	img_map = {img["file_name"]: img["id"] for img in coco["images"]}
	if filename not in img_map:
	return []
	image_id = img_map[filename]
	return [a for a in coco["annotations"] if a["image_id"] == image_id]


	def coco_seg_to_mask(segmentation, height, width):
	"""
	Convert a COCO segmentation polygon to a binary mask.

	Args:
	segmentation: COCO segmentation — list of flat [x1,y1,x2,y2,...] arrays.
	call get_image_annotations to get annots.
	segmentation = annots[i]['segmentation'][0], where i is the
	mask of the i-th tooth.
	height, width: Image dimensions.

	Returns:
	Boolean mask [H, W] showing just one tooth.
	Call it in a loop to get all the masks
	"""
	mask = np.zeros((height, width), dtype=bool)
	for poly in segmentation:
	xs = np.array(poly[0::2])
	ys = np.array(poly[1::2])
	rr, cc = skimage.draw.polygon(ys, xs)
	rr = np.clip(rr, 0, height - 1)
	cc = np.clip(cc, 0, width - 1)
	mask[rr, cc] = True
	return mask


	def build_masks(coco, image_filename, height, width):
	"""
	(H, W, N) boolean mask array for all annotated teeth in one image.

	Args:
	coco: Loaded COCO annotation dict.
	image_filename: Image filename e.g. '001.jpg'
	height: Image height in pixels.
	width: Image width in pixels.

	Returns:
	masks: Boolean array of shape (H, W, N) where N = number of teeth.
	class_ids - list of N category_ids(FDI numbers)
	"""
	# get all annotations for this image
	anns = get_image_annotations(coco, image_filename)

	if not anns:
	#return empty mask if no annptations
	return np.zeros((height, width, 0), dtype=bool)

	masks = []
	class_ids = []
	for ann in anns:
	seg = ann.get("segmentation", [])
	if not seg:
	continue
	mask = coco_seg_to_mask(seg, height, width)
	masks.append(mask)
	class_ids.append(ann['category_id'])

	if not masks:
	return np.zeros((height,width,0),dtype=bool), []

	return np.stack(masks, axis=-1), class_ids # → (H, W, N)


	def count_teeth_per_image(coco):
	"""
	Return a dict mapping filename → number of annotated teeth.

	Args:
	coco - annotaiton.json file
	"""
	img_map = {img["id"]: img["file_name"] for img in coco["images"]}
	counts = {}
	for ann in coco["annotations"]:
	fname = img_map.get(ann["image_id"], "unknown")
	counts[fname] = counts.get(fname, 0) + 1
	return counts


	def class_frequency(coco):
	"""
	Return a dict mapping category_id → annotation count.

	Args:
	annotation json file

	Returns:
	Dictionary containing the tooth category and the number of
	time that tooth mask appears (Dict[int,int])

	"""
	freq= {}
	for ann in coco["annotations"]:
	cat_id = ann["category_id"]
	freq[cat_id] = freq.get(cat_id, 0) + 1
	return freq


	# Dataset split utility

	def split_coco_dataset(coco, train_ratio = 0.8, seed = 42):
	"""
	Split a COCO dataset dict into train/val subsets.

	Args:
	coco: Full COCO dataset dict.
	train_ratio: Fraction for training.
	seed: Random seed.

	Returns:
	(train_coco, val_coco) — tuple with 2 dicts, each a valid COCO dict.
	"""
	random.seed(seed)
	images = coco["images"][:]
	random.shuffle(images)
	n = int(len(images) * train_ratio)
	train_imgs, val_imgs = images[:n], images[n:]
	train_ids = {img["id"] for img in train_imgs}
	val_ids = {img["id"] for img in val_imgs}

	def subset(imgs, ids):
	return {
	**coco,
	"images": imgs,
	"annotations": [a for a in coco["annotations"] if a["image_id"] in ids],
	}

	return subset(train_imgs, train_ids), subset(val_imgs, val_ids)


	# Batch preprocessing

	def preprocess_directory(input_dir,output_dir,
	contrast_method = "clahe",
	min_dim = 512,
	max_dim = 1024):
	"""
	Preprocess all images in a directory (enhance contrast, resize).
	TIF images are converted to PNG on output.
	"""
	os.makedirs(output_dir, exist_ok=True)
	exts = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp", ".dcm"}
	paths = [p for p in Path(input_dir).iterdir() if p.suffix.lower() in exts]
	print(f"Preprocessing {len(paths)} images → {output_dir}")

	for p in paths:
	img = load_image(str(p))
	img = enhance_contrast(img, method=contrast_method)
	#img, _, _ = resize_image(img, min_dim=min_dim, max_dim=max_dim)

	# Save as PNG (handles TIF sources cleanly)
	out_name = p.stem + ".jpg"
	out_path = os.path.join(output_dir, out_name)
	cv2.imwrite(out_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 95])

	print("Done.")