Spaces:

felipekitamura
/

face_deid_ct

Sleeping

App Files Files Community

face_deid_ct / face_deid_ct.py

felipekitamura

Update face_deid_ct.py

873bd94 verified 8 months ago

raw

history blame contribute delete

10.5 kB

	import os
	import pydicom
	import numpy as np
	import cv2
	from matplotlib import pyplot as plt
	import random
	import time
	import tqdm
	from IPython.core.display import display, HTML

	# Determine if we are in a Jupyter notebook
	try:
	shell = get_ipython().__class__.__name__
	if shell == 'ZMQInteractiveShell':
	# We are in Jupyter, use tqdm.notebook
	from tqdm.notebook import tqdm
	else:
	raise Exception()
	except:
	# We are in a terminal, use standard tqdm
	from tqdm import tqdm


	FACE_MAX_VALUE = 50
	FACE_MIN_VALUE = -125

	AIR_THRESHOLD = -800
	KERNEL_SIZE = 35



	def is_dicom(file_path):
	try:
	pydicom.dcmread(file_path)
	return True
	except Exception:
	return False

	def get_first_directory(path):
	# Normalize the path to always use Unix-style path separators
	normalized_path = path.replace("\\", "/")
	split_path = normalized_path.split("/")[-1]

	return split_path # Return None if no directories are found

	def list_dicom_directories(root_dir):
	dicom_dirs = set()

	for root, dirs, files in os.walk(root_dir):
	for file in files:
	file_path = os.path.join(root, file)
	if is_dicom(file_path):
	dicom_dirs.add(root)
	break

	return list(dicom_dirs)

	def load_scan(path):
	slices = [pydicom.dcmread(path + '/' + s) for s in os.listdir(path)]
	slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
	try:
	slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
	except:
	try:
	slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
	except:
	slice_thickness = 1.0

	for s in slices:
	s.SliceThickness = slice_thickness

	return slices

	def get_pixels_hu(slices):
	image = np.stack([s.pixel_array for s in slices])
	# Convert to int16 (from sometimes int16),
	# should be possible as values should always be low enough (<32k)
	image = image.astype(np.int16)

	# Set outside-of-scan pixels to 0
	# The intercept is usually -1024, so air is approximately 0
	image[image == -2000] = 0

	# Convert to Hounsfield units (HU)
	for slice_number in range(len(slices)):

	intercept = slices[slice_number].RescaleIntercept
	slope = slices[slice_number].RescaleSlope

	if slope != 1:
	image[slice_number] = slope * image[slice_number].astype(np.float64)
	image[slice_number] = image[slice_number].astype(np.int16)

	image[slice_number] += np.int16(intercept)

	return np.array(image, dtype=np.int16)

	def binarize_volume(volume, air_hu=AIR_THRESHOLD):
	binary_volume = np.zeros_like(volume, dtype=np.uint8)
	binary_volume[volume <= air_hu] = 1
	return binary_volume

	def largest_connected_component(binary_image):
	# Find all connected components and stats
	num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)

	# Get the index of the largest component, ignoring the background
	# The background is considered as a component by connectedComponentsWithStats and it is usually the first component
	largest_component_index = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1

	# Create an image to keep largest component only
	largest_component_image = np.zeros(labels.shape, dtype=np.uint8)
	largest_component_image[labels == largest_component_index] = 1

	return largest_component_image

	def get_largest_component_volume(volume):
	# Initialize an empty array to hold the processed volume
	processed_volume = np.empty_like(volume, dtype=np.uint8)

	# Iterate over each slice in the volume
	for i in range(volume.shape[0]):
	# Process the slice and store it in the processed volume
	processed_volume[i] = largest_connected_component(volume[i])

	return processed_volume



	def dilate_volume(volume, kernel_size=KERNEL_SIZE):
	# Create the structuring element (kernel) for dilation
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))

	# Initialize an empty array to hold the dilated volume
	dilated_volume = np.empty_like(volume)

	# Iterate over each slice in the volume
	for i in range(volume.shape[0]):
	# Dilate the slice and store it in the dilated volume
	dilated_volume[i] = cv2.dilate(volume[i].astype(np.uint8), kernel)

	return dilated_volume


	def apply_mask_and_get_values(image_volume, mask_volume):
	# Apply the mask by multiplying the image volume with the mask volume
	masked_volume = image_volume * mask_volume

	# Get all unique values in the masked volume, excluding zero
	unique_values = np.unique(masked_volume)
	unique_values = unique_values[unique_values > FACE_MIN_VALUE]
	unique_values = unique_values[unique_values < FACE_MAX_VALUE]

	# Convert numpy array to a list
	unique_values_list = unique_values.tolist()

	return unique_values_list


	def apply_random_values_optimized(pixels_hu, dilated_volume, unique_values_list):
	# Initialize new volume as a copy of the original volume
	new_volume = np.copy(pixels_hu)

	# Generate random indices
	random_indices = np.random.choice(len(unique_values_list), size=np.sum(dilated_volume))

	# Select random values from the unique_values_list
	random_values = np.array(unique_values_list)[random_indices]

	# Apply the random values to the locations where dilated_volume equals 1
	new_volume[dilated_volume == 1] = random_values

	return new_volume

	def save_new_dicom_files(new_volume, original_dir, out_path, app="_d"):
	# Create a new directory path by appending "_d" to the original directory
	if out_path is None:
	new_dir = original_dir + app
	else:
	new_dir = out_path

	# Create the new directory if it doesn't exist
	if not os.path.exists(new_dir):
	os.makedirs(new_dir)

	# List all DICOM files in the original directory
	dicom_files = [os.path.join(original_dir, f) for f in os.listdir(original_dir) if f.endswith('.dcm')]

	# Sort the dicom_files list by SliceLocation
	dicom_files.sort(key=lambda x: pydicom.dcmread(x).SliceLocation)

	# Loop over each slice of the new volume
	for i in range(new_volume.shape[0]):
	# Get the corresponding original DICOM file
	dicom_file = dicom_files[i]

	# Read the file
	ds = pydicom.dcmread(dicom_file)
	ds.decompress()

	# Revert the slope and intercept operation on the slice
	new_slice = (new_volume[i] - ds.RescaleIntercept) / ds.RescaleSlope

	# Update the pixel data with the data from the new slice
	ds.PixelData = new_slice.astype(np.int16).tobytes()

	# Generate new file name
	new_file_name = os.path.join(new_dir, f"new_image_{i}.dcm")

	# Save the new DICOM file
	ds.save_as(new_file_name)



	def drown_volume(in_path, out_path='deid_ct', replacer='face'):
	"""
	Processes DICOM files from the provided directory by binarizing, getting the largest connected component,
	dilating and applying mask. Then applies random values to the dilated volume based on a unique values list
	obtained from the masked volume (or air value). The results are saved as new DICOM files in a specified directory.

	Parameters:
	in_path (str): The path to the directory containing the input DICOM files.
	out_path (str, optional): The path to the directory where the output DICOM files will be saved.
	If not provided, the output files will be saved in the input directory appended by "_d".
	replacer (str, optional): Indicates what kind of pixels are going to be replaced. Default is 'face'.
	'face': replaces air and face with random values that are found in the skin and subcutaneous fat.
	'air': replaces air and face with -1000 HU.
	int: replaces air and face with int HU.

	Returns:
	None. The function saves new DICOM files and prints the total elapsed time of the operation.
	"""
	start_time = time.time()

	dirs = list_dicom_directories(in_path)

	for _d in tqdm(dirs, desc="List of studies"):

	with tqdm(total=8, desc="Processing DICOM Files", leave=False) as pbar:
	# Load the DICOM files
	slices = load_scan(_d)
	pbar.update()

	# Get the pixel values and convert them to Hounsfield Units (HU)
	pixels_hu = get_pixels_hu(slices)
	pbar.update()

	# Apply the binarization function on the HU volume
	binarized_volume = binarize_volume(pixels_hu)
	pbar.update()

	# Get the largest connected component from the binarized volume
	processed_volume = get_largest_component_volume(binarized_volume)
	pbar.update()

	# Dilate the processed volume
	dilated_volume = dilate_volume(processed_volume)
	pbar.update()
	if replacer == 'face':
	# Apply the mask to the original volume and get unique values list
	unique_values_list = apply_mask_and_get_values(pixels_hu, dilated_volume - processed_volume)
	elif replacer == 'air':
	unique_values_list = [0]
	else:
	try:
	replacer = int(replacer)
	unique_values_list = [replacer]
	except:
	print('replacer must be either air, face, or an integer number in Hounsfield units, but ' + str(replacer) + ' was provided.')
	print('replacing with face')
	unique_values_list = apply_mask_and_get_values(pixels_hu, dilated_volume - processed_volume)

	pbar.update()

	# Apply random values to the dilated volume based on the unique values list
	new_volume = apply_random_values_optimized(pixels_hu, dilated_volume, unique_values_list)
	pbar.update()

	# Save the new DICOM files
	out_path_n = out_path + "/" + get_first_directory(_d)
	save_new_dicom_files(new_volume, _d, out_path_n)
	pbar.update()

	elapsed_time = time.time() - start_time
	print(f"Total elapsed time: {elapsed_time} seconds")