Spaces:

mansimodi
/

GenerateMosaic

Sleeping

GenerateMosaic / preprocess.py

mansi.modi@streebo.com

Add preprocess files

0404f22 11 months ago

3.91 kB

	# build_kdtree.py

	import os
	import cv2
	import numpy as np
	import pickle
	import math
	from sklearn.neighbors import KDTree

	# ----------------- Constants -----------------
	DATASET_FOLDER = "Dataset" # Folder containing your dataset images
	KD_TILE_SIZE = (50, 50) # Fixed size to which each dataset image will be resized
	KD_TREE_PATH = "kdtree_dataset.pkl" # Output pickle file

	# ----------------- Feature Extraction -----------------
	def compute_features(image):
	"""
	Compute a set of features for an image:
	- Average Lab color (using a Gaussian-blurred version)
	- Edge density using Canny edge detection (normalized)
	- Texture measure using the standard deviation of the grayscale image (normalized)
	- Average gradient magnitude computed via Sobel operators (normalized)
	Returns: (avg_lab, avg_edge, avg_texture, avg_grad)
	"""
	# Gaussian blur to reduce noise before computing Lab color
	blurred = cv2.GaussianBlur(image, (5, 5), 0)
	img_lab = cv2.cvtColor(blurred, cv2.COLOR_RGB2LAB)
	avg_lab = np.mean(img_lab, axis=(0, 1))

	# Convert to grayscale for edge and texture computations
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# Edge density: apply Canny and normalize
	edges = cv2.Canny(gray, 100, 200)
	avg_edge = np.mean(edges) / 255.0

	# Texture: standard deviation (normalized)
	avg_texture = np.std(gray) / 255.0

	# Gradient magnitude using Sobel operators
	grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
	grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
	grad_mag = np.sqrt(grad_x2 + grad_y2)
	avg_grad = np.mean(grad_mag) / 255.0

	return avg_lab, avg_edge, avg_texture, avg_grad

	def build_kdtree():
	"""
	Build a KDTree from dataset images. Each image is resized to KD_TILE_SIZE,
	its features are computed and then weighted (using weights: 1.0 for Lab channels,
	0.5 for edge, texture, and gradient differences).
	The KDTree along with the list of dataset images is stored in a pickle file.
	"""
	# Weights: for the Lab channels, weight = 1.0 (so sqrt(1.0)=1),
	# for the other features, weight = 0.5 (so multiply by sqrt(0.5)).
	scale = np.array([1.0, 1.0, 1.0, math.sqrt(0.5), math.sqrt(0.5), math.sqrt(0.5)])

	feature_list = []
	images_list = []

	# Get full paths for images in the dataset folder
	image_paths = [os.path.join(DATASET_FOLDER, img) for img in os.listdir(DATASET_FOLDER)
	if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

	for img_path in image_paths:
	img = cv2.imread(img_path)
	if img is None:
	continue
	# Resize image to KD_TILE_SIZE and convert BGR -> RGB
	img = cv2.resize(img, KD_TILE_SIZE)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	# Compute features for the image
	avg_lab, avg_edge, avg_texture, avg_grad = compute_features(img)
	# Concatenate the features into a 6-dimensional vector:
	raw_feature = np.concatenate([avg_lab, [avg_edge, avg_texture, avg_grad]])
	# Apply weighting: multiply each element by the square-root of its weight
	weighted_feature = raw_feature * scale
	feature_list.append(weighted_feature)
	images_list.append(img)

	if not feature_list:
	print("No images found in dataset folder!")
	return

	features = np.array(feature_list)
	# Build the KDTree using the weighted features
	tree = KDTree(features)

	tree_data = {
	'tree': tree,
	'images': images_list,
	'features': features # optional: may be used for debugging
	}

	# Save the KDTree and dataset images to a pickle file
	with open(KD_TREE_PATH, "wb") as f:
	pickle.dump(tree_data, f)

	print(f"KDTree built and saved to {KD_TREE_PATH}. Total images: {len(images_list)}")

	if __name__ == "__main__":
	build_kdtree()