Merge branch 'main' of hf.co:sharvari0b26/NewModel

60d762b 4 months ago

12.2 kB

	import cv2
	import numpy as np
	from skimage.feature.texture import graycomatrix, graycoprops
	from skimage.feature import local_binary_pattern ,hog
	<<<<<<< HEAD
	from sklearn.decomposition import PCA
	from sklearn.svm import SVC
	from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
	from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
	from sklearn.feature_selection import SelectKBest, f_classif
	from sklearn.preprocessing import StandardScaler
	from sklearn.pipeline import Pipeline


	def rgb_histogram(image, bins=32):
	features = []

	# Convert to float32 for stability
	image = image.astype(np.float32)

	# RGB histograms
	=======
	from skimage.feature import local_binary_pattern
	from sklearn.decomposition import PCA
	from sklearn.svm import SVC
	from sklearn.model_selection import GridSearchCV
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import classification_report


	def rgb_histogram(image, bins=64):
	features = []

	# RGB histograms (reduced bins)
	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1
	for i in range(3):
	hist = cv2.calcHist([image], [i], None, [bins], [0, 256])
	hist = cv2.normalize(hist, hist).flatten()
	features.extend(hist)
	<<<<<<< HEAD

	# HSV histograms
	hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV)
	for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])):
	hist = cv2.calcHist([hsv], [i], None, [bins], [low, high])
	hist = cv2.normalize(hist, hist).flatten()
	features.extend(hist)

	# Color moments (mean, std, skew)
	for i in range(3):
	channel = image[:, :, i]
	mean = np.mean(channel)
	std = np.std(channel)
	skew = np.cbrt(np.mean((channel - mean) ** 3))
	features.extend([mean, std, skew])

	return np.array(features)


	def hu_moments(image):
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	moments = cv2.moments(gray)
	hu = cv2.HuMoments(moments).flatten()
	hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10)
	# Clip extreme values to reduce sensitivity to noise
	hu = np.clip(hu, -10, 10)
	return hu


	def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64):
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	gray = (gray // (256 // levels)).astype(np.uint8) # quantization
	features = []

	for d in distances:
	for a in angles:
	glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True)
	props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
	for p in props:
	val = graycoprops(glcm, p).flatten()
	features.extend(val)

	return np.array(features)


	def local_binary_pattern_features(image, P=8, R=1):
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	lbp = local_binary_pattern(gray, P, R, method='uniform')
	hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
	return hist


	# Edge Density (Canny-based)
	def edge_density(image, low_threshold=50, high_threshold=150):
	=======

	# HSV color space (more discriminative)
	hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
	for i in range(3):
	hist = cv2.calcHist([hsv], [i], None, [bins], [0, 256])
	hist = cv2.normalize(hist, hist).flatten()
	features.extend(hist)

	# Color moments (mean, std for each channel)
	for i in range(3):
	channel = image[:, :, i].astype(np.float32)
	features.append(np.mean(channel))
	features.append(np.std(channel))
	features.append(np.median(channel))

	return np.array(features)

	def hu_moments(image):
	# Convert to grayscale if the image is in RGB format
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	moments = cv2.moments(gray)
	hu_moments = cv2.HuMoments(moments).flatten()
	# Apply log transform to reduce scale variance
	hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-10)
	return hu_moments

	def glcm_features(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True):
	# Multiple distance-angle combinations for texture diversity
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	glcm = graycomatrix(gray, distances=distances, angles=angles, levels=levels, symmetric=symmetric, normed=normed)
	contrast = graycoprops(glcm, 'contrast').flatten()
	dissimilarity = graycoprops(glcm, 'dissimilarity').flatten()
	homogeneity = graycoprops(glcm, 'homogeneity').flatten()
	energy = graycoprops(glcm, 'energy').flatten()
	correlation = graycoprops(glcm, 'correlation').flatten()
	asm = graycoprops(glcm, 'ASM').flatten()
	return np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation, asm])

	def local_binary_pattern_features(image, P=8, R=1): #Higher P and R
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	lbp = local_binary_pattern(gray, P, R, method='uniform')
	(hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
	return hist



	# Edge Density (Canny-based)

	def edge_density(image, low_threshold=50, high_threshold=150):

	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	edges = cv2.Canny(gray, low_threshold, high_threshold)
	density = np.sum(edges > 0) / edges.size
	return np.array([density])


	<<<<<<< HEAD
	def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9):
	=======


	def hog_features(image, pixels_per_cell=(64, 64), cells_per_block=(1, 1), orientations=4):
	"""
	Highly compressed HOG features to prevent overfitting
	"""
	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1
	image_resized = cv2.resize(image, (128, 128))
	gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)
	hog_feat = hog(gray,
	orientations=orientations,
	pixels_per_cell=pixels_per_cell,
	cells_per_block=cells_per_block,
	block_norm='L2-Hys',
	<<<<<<< HEAD
	transform_sqrt=True,
	=======
	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1
	feature_vector=True)
	return hog_feat


	def extract_features_from_image(image):
	<<<<<<< HEAD
	hist = rgb_histogram(image)
	hu = hu_moments(image)
	glcm = glcm_features(image)
	lbp = local_binary_pattern_features(image)
	edge = edge_density(image)
	hog_f = hog_features(image)

	return np.concatenate([hist, hu, glcm, lbp, edge, hog_f])
	=======

	# 1. RGB Histogram
	hist_features = rgb_histogram(image)


	# 2. Hu Moments
	hu_features = hu_moments(image)

	# 3. GLCM Features
	glcm_features_vector = glcm_features(image)

	# 4. Local Binary Pattern (LBP)
	lbp_features = local_binary_pattern_features(image)


	#### Add more feature extraction methods here ####

	edge_feat = edge_density(image)
	hog_feat = hog_features(image)


	##################################################


	# Concatenate all feature vectors
	image_features = np.concatenate([hist_features, hu_features, glcm_features_vector, lbp_features
	,edge_feat,hog_feat])


	return image_features


	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1

	def perform_pca(data, num_components):
	# Clean data
	data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)

	# Standardize
	scaler = StandardScaler()
	data_standardized = scaler.fit_transform(data)

	# Apply PCA
	k = min(num_components, data.shape[1])
	pca = PCA(n_components=k)
	data_reduced = pca.fit_transform(data_standardized)

	print(f"PCA: Reduced from {data.shape[1]} to {k} components")
	print(f"Explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")

	return data_reduced

	<<<<<<< HEAD
	def train_svm_model(features, labels,
	test_size=0.2,
	random_state=42,
	use_selectkbest=True,
	k_best=500,
	n_pca_components=100,
	do_gridsearch=False):
	"""
	Returns:
	pipeline: trained sklearn Pipeline (scaler -> optional SelectKBest -> PCA -> SVC)
	X_test, y_test, y_pred for quick evaluation
	grid_search (if do_gridsearch True), else None
	"""
	if labels.ndim > 1 and labels.shape[1] > 1:
	labels = np.argmax(labels, axis=1)

	# stratified split
	X_train, X_test, y_train, y_test = train_test_split(
	features, labels, test_size=test_size, random_state=random_state, stratify=labels)

	# build pipeline steps
	steps = []
	steps.append(('scaler', StandardScaler()))
	if use_selectkbest:
	steps.append(('select', SelectKBest(score_func=f_classif, k=min(k_best, X_train.shape[1]))))
	steps.append(('pca', PCA(n_components=min(n_pca_components, X_train.shape[1]))))
	steps.append(('svc', SVC(kernel='linear', probability=True, class_weight='balanced', random_state=random_state)))
	pipeline = Pipeline(steps)

	grid_search = None
	if do_gridsearch:
	param_grid = {
	'select__k': [int(min(200, X_train.shape[1])), int(min(500, X_train.shape[1])), int(min(1000, X_train.shape[1]))] if use_selectkbest else [],
	'pca__n_components': [50, 100, 200],
	'svc__C': [0.1, 1, 5, 10]
	}
	# remove empty keys if use_selectkbest is False
	param_grid = {k: v for k, v in param_grid.items() if v}
	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
	grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring='accuracy', verbose=2)
	grid_search.fit(X_train, y_train)
	best_model = grid_search.best_estimator_
	pipeline = best_model
	else:
	pipeline.fit(X_train, y_train)

	# Evaluate
	y_pred = pipeline.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	print(f"Test Accuracy: {acc:.4f}")
	print(classification_report(y_test, y_pred))
	print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

	return pipeline, (X_test, y_test, y_pred), grid_search

	=======

	def train_svm_model(features, labels, test_size=0.2, k=100):
	"""
	Trains an SVM model and returns the trained model.

	Parameters:
	- features: Feature matrix of shape (B, F)
	- labels: Label matrix of shape (B, C) if one-hot encoded, or (B,) for single labels
	- test_size: Proportion of the data to use for testing (default is 0.2)

	Returns:
	- svm_model: Trained SVM model
	"""
	# Check if labels are one-hot encoded, convert if needed
	if labels.ndim > 1 and labels.shape[1] > 1:
	labels = np.argmax(labels, axis=1) # Convert one-hot to single label per sample

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42)

	# ---------- FIX 1: Standardize TRAIN ONLY ----------
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# ---------- FIX 2: PCA fit ONLY on TRAIN ----------
	pca = PCA(n_components=min(k, X_train_scaled.shape[1]))
	X_train_reduced = pca.fit_transform(X_train_scaled)
	X_test_reduced = pca.transform(X_test_scaled)

	# SVM GridSearch
	param_grid = {
	'C': [0.1, 1],
	'gamma': [0.001, 0.0001],
	'kernel': ['rbf']
	}
	grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
	grid.fit(X_train_reduced, y_train)

	# Evaluate
	preds = grid.predict(X_test_reduced)
	report = classification_report(y_test, preds)

	# Return EVERYTHING needed for inference
	return {
	"svm": grid,
	"scaler": scaler,
	"pca": pca,
	"report": report
	}
	>>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1