Spaces:

78anand
/

KasaHealth

Running

KasaHealth / utils /check_overlap.py

Upload folder using huggingface_hub

f317798 verified about 2 months ago

1.22 kB

	import numpy as np
	import os

	base = r"c:\Users\ASUS\lung_ai_project\data"
	dir_orig = os.path.join(base, "hear_embeddings")
	dir_aug = os.path.join(base, "hear_embeddings_augmented")

	x1 = np.load(os.path.join(dir_orig, "X_hear.npy"))
	y1 = np.load(os.path.join(dir_orig, "y_hear.npy"))

	x2 = np.load(os.path.join(dir_aug, "X_hear_aug.npy"))
	y2 = np.load(os.path.join(dir_aug, "y_hear_aug.npy"))

	# Detailed check
	print(f"Original: {x1.shape}")
	print(f"Augmented: {x2.shape}")

	# Check first sick sample in Orig
	sick_indices_1 = np.where(y1 == 'sick')[0]
	sick_sample_1 = x1[sick_indices_1[0]]

	# Check if this sample exists in x2
	matches = np.all(np.isclose(x2, sick_sample_1, atol=1e-5), axis=1)
	if np.any(matches):
	print("Found exact match of original sick sample in augmented data.")
	print(f"Count of matches: {np.sum(matches)}")
	else:
	print("Original sick sample NOT found in augmented data (implies transformation or different subset).")

	# Check if x2 contains duplicates within itself
	_, counts = np.unique(x2, axis=0, return_counts=True)
	if np.any(counts > 1):
	print("Augmented data contains exact duplicates!")
	else:
	print("Augmented data has unique samples.")