Upload folder using huggingface_hub

457db56 verified 3 months ago

6.24 kB

	import argparse
	import os
	import sys
	import numpy as np
	import pandas as pd
	from PIL import Image
	from tqdm import tqdm
	import glob
	from scipy.spatial.distance import directed_hausdorff

	# Add project root to path for custom module import
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
	from guided_diffusion.custom_lidc_dataset import CustomLIDCDataset

	# --- METRIC IMPLEMENTATIONS ---

	def dice_coefficient(pred, target):
	smooth = 1e-6
	pred, target = pred.astype(bool), target.astype(bool)
	if not np.any(pred) and not np.any(target): return 1.0
	intersection = np.sum(pred & target)
	return (2. * intersection + smooth) / (np.sum(pred) + np.sum(target) + smooth)

	def iou_score(pred, target):
	smooth = 1e-6
	pred, target = pred.astype(bool), target.astype(bool)
	if not np.any(pred) and not np.any(target): return 1.0
	intersection = np.sum(pred & target)
	union = np.sum(pred \| target)
	return (intersection + smooth) / (union + smooth)

	def hausdorff_distance(pred, target):
	pred_points, target_points = np.argwhere(pred), np.argwhere(target)
	if len(pred_points) == 0 or len(target_points) == 0: return np.nan
	return max(directed_hausdorff(pred_points, target_points)[0], directed_hausdorff(target_points, pred_points)[0])

	def calculate_combined_sensitivity(samples, gts):
	combined_sample = np.logical_or.reduce([s.astype(bool) for s in samples])
	combined_gt = np.logical_or.reduce([g.astype(bool) for g in gts])
	if not np.any(combined_gt): return 1.0
	tp = np.sum(combined_sample & combined_gt)
	return tp / np.sum(combined_gt)

	def calculate_d_max(samples, gts):
	max_dice_scores = [np.max([dice_coefficient(s, gt) for s in samples]) if samples else 0.0 for gt in gts]
	return np.mean(max_dice_scores) if max_dice_scores else 0.0

	def calculate_diversity_agreement(samples, gts):
	def get_variances(masks):
	if len(masks) < 2: return 0, 0
	scores = [1.0 - dice_coefficient(masks[i], masks[j]) for i in range(len(masks)) for j in range(i + 1, len(masks))]
	return np.min(scores) if scores else 0, np.max(scores) if scores else 0
	V_min_gt, V_max_gt = get_variances(gts)
	V_min_sample, V_max_sample = get_variances(samples)
	return 1.0 - (abs(V_min_gt - V_min_sample) + abs(V_max_gt - V_max_sample)) / 2.0

	def calculate_ci_score(samples, gts):
	Sc = calculate_combined_sensitivity(samples, gts)
	Dmax = calculate_d_max(samples, gts)
	Da = calculate_diversity_agreement(samples, gts)
	denominator = Sc + Dmax + Da
	ci = (3 * Sc * Dmax * Da) / (denominator + 1e-8)
	return ci, Sc, Dmax, Da

	def calculate_ged(samples, gts):
	dist_fn = lambda x, y: 1.0 - iou_score(x, y)
	d_st = np.mean([dist_fn(s, g) for s in samples for g in gts])
	d_ss = np.mean([dist_fn(samples[i], samples[j]) for i in range(len(samples)) for j in range(i + 1, len(samples))]) if len(samples) > 1 else 0
	d_gg = np.mean([dist_fn(gts[i], gts[j]) for i in range(len(gts)) for j in range(i + 1, len(gts))]) if len(gts) > 1 else 0
	return 2 * d_st - d_ss - d_gg

	def load_mask(path):
	with Image.open(path) as img:
	return np.array(img.convert("L")) > 127

	def main():
	parser = argparse.ArgumentParser(description="Evaluate ambiguous segmentation samples.")
	parser.add_argument("--samples_dir", type=str, required=True, help="Directory with saved sample masks.")
	parser.add_argument("--gt_data_dir", type=str, required=True, help="Path to the root of the LIDC data directory.")
	parser.add_argument("--dataset_type", type=str, default="lidc", choices=["lidc", "multiannotator"],
	help="Ground-truth dataset layout.")
	parser.add_argument("--split_strategy", type=str, default="all_annotations",
	help="Split strategy for multiannotator dataset.")
	parser.add_argument("--image_size", type=int, default=128, help="Image size used during training/sampling.")
	parser.add_argument("--results_file", type=str, default="evaluation_results.csv", help="Path to save the output CSV file.")
	args = parser.parse_args()

	gt_dataset = CustomLIDCDataset(
	data_root=args.gt_data_dir,
	split="test",
	image_size=args.image_size,
	dataset_type=args.dataset_type,
	split_strategy=args.split_strategy,
	)
	all_results = []

	print(f"Evaluating {len(gt_dataset)} images...")
	for i in tqdm(range(len(gt_dataset))):
	_, gts_tensor, image_id = gt_dataset[i]
	image_id = str(image_id)

	sample_paths = sorted(glob.glob(os.path.join(args.samples_dir, f"{image_id}_sample_*.png")))
	if not sample_paths:
	print(f"Warning: No samples found for {image_id}. Skipping.")
	continue

	samples = [load_mask(p) for p in sample_paths]
	gts = [gt.numpy() for gt in gts_tensor]

	ci_score, sc, dmax, da = calculate_ci_score(samples, gts)
	ged = calculate_ged(samples, gts)

	all_dice = [dice_coefficient(s, g) for s in samples for g in gts]
	all_iou = [iou_score(s, g) for s in samples for g in gts]
	all_hd = [d for d in [hausdorff_distance(s,g) for s in samples for g in gts] if not np.isnan(d)]

	all_results.append({
	"image_id": image_id, "CI_Score": ci_score, "Combined_Sensitivity": sc,
	"D_max": dmax, "Diversity_Agreement": da, "GED": ged,
	"Avg_Dice": np.mean(all_dice) if all_dice else 0,
	"Avg_IoU": np.mean(all_iou) if all_iou else 0,
	"Avg_Hausdorff": np.mean(all_hd) if all_hd else np.nan,
	})

	if not all_results:
	print("No results generated. Check paths and filenames.")
	return

	df = pd.DataFrame(all_results)
	avg_row = df.mean(numeric_only=True).to_frame().T
	avg_row['image_id'] = 'AVERAGE'
	df_final = pd.concat([df, avg_row], ignore_index=True)

	df_final.to_csv(args.results_file, index=False, float_format='%.4f')
	print(f"\nEvaluation complete. Results saved to {args.results_file}")
	print("\n--- Averages ---")
	print(avg_row.to_string(index=False, float_format='%.4f'))

	if __name__ == "__main__":
	main()