import cv2 import numpy as np from skimage.feature.texture import graycomatrix, graycoprops from skimage.feature import local_binary_pattern ,hog <<<<<<< HEAD from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold from sklearn.metrics import accuracy_score, confusion_matrix, classification_report from sklearn.feature_selection import SelectKBest, f_classif from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline def rgb_histogram(image, bins=32): features = [] # Convert to float32 for stability image = image.astype(np.float32) # RGB histograms ======= from skimage.feature import local_binary_pattern from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler from sklearn.metrics import classification_report def rgb_histogram(image, bins=64): features = [] # RGB histograms (reduced bins) >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1 for i in range(3): hist = cv2.calcHist([image], [i], None, [bins], [0, 256]) hist = cv2.normalize(hist, hist).flatten() features.extend(hist) <<<<<<< HEAD # HSV histograms hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV) for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])): hist = cv2.calcHist([hsv], [i], None, [bins], [low, high]) hist = cv2.normalize(hist, hist).flatten() features.extend(hist) # Color moments (mean, std, skew) for i in range(3): channel = image[:, :, i] mean = np.mean(channel) std = np.std(channel) skew = np.cbrt(np.mean((channel - mean) ** 3)) features.extend([mean, std, skew]) return np.array(features) def hu_moments(image): gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) moments = cv2.moments(gray) hu = cv2.HuMoments(moments).flatten() hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10) # Clip extreme values to reduce sensitivity to noise hu = np.clip(hu, -10, 10) return hu def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64): gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray = (gray // (256 // levels)).astype(np.uint8) # quantization features = [] for d in distances: for a in angles: glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True) props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation'] for p in props: val = graycoprops(glcm, p).flatten() features.extend(val) return np.array(features) def local_binary_pattern_features(image, P=8, R=1): gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) lbp = local_binary_pattern(gray, P, R, method='uniform') hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True) return hist # Edge Density (Canny-based) def edge_density(image, low_threshold=50, high_threshold=150): ======= # HSV color space (more discriminative) hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) for i in range(3): hist = cv2.calcHist([hsv], [i], None, [bins], [0, 256]) hist = cv2.normalize(hist, hist).flatten() features.extend(hist) # Color moments (mean, std for each channel) for i in range(3): channel = image[:, :, i].astype(np.float32) features.append(np.mean(channel)) features.append(np.std(channel)) features.append(np.median(channel)) return np.array(features) def hu_moments(image): # Convert to grayscale if the image is in RGB format gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) moments = cv2.moments(gray) hu_moments = cv2.HuMoments(moments).flatten() # Apply log transform to reduce scale variance hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-10) return hu_moments def glcm_features(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True): # Multiple distance-angle combinations for texture diversity gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) glcm = graycomatrix(gray, distances=distances, angles=angles, levels=levels, symmetric=symmetric, normed=normed) contrast = graycoprops(glcm, 'contrast').flatten() dissimilarity = graycoprops(glcm, 'dissimilarity').flatten() homogeneity = graycoprops(glcm, 'homogeneity').flatten() energy = graycoprops(glcm, 'energy').flatten() correlation = graycoprops(glcm, 'correlation').flatten() asm = graycoprops(glcm, 'ASM').flatten() return np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation, asm]) def local_binary_pattern_features(image, P=8, R=1): #Higher P and R gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) lbp = local_binary_pattern(gray, P, R, method='uniform') (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True) return hist # Edge Density (Canny-based) def edge_density(image, low_threshold=50, high_threshold=150): >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1 gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) edges = cv2.Canny(gray, low_threshold, high_threshold) density = np.sum(edges > 0) / edges.size return np.array([density]) <<<<<<< HEAD def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9): ======= def hog_features(image, pixels_per_cell=(64, 64), cells_per_block=(1, 1), orientations=4): """ Highly compressed HOG features to prevent overfitting """ >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1 image_resized = cv2.resize(image, (128, 128)) gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY) hog_feat = hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, block_norm='L2-Hys', <<<<<<< HEAD transform_sqrt=True, ======= >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1 feature_vector=True) return hog_feat def extract_features_from_image(image): <<<<<<< HEAD hist = rgb_histogram(image) hu = hu_moments(image) glcm = glcm_features(image) lbp = local_binary_pattern_features(image) edge = edge_density(image) hog_f = hog_features(image) return np.concatenate([hist, hu, glcm, lbp, edge, hog_f]) ======= # 1. RGB Histogram hist_features = rgb_histogram(image) # 2. Hu Moments hu_features = hu_moments(image) # 3. GLCM Features glcm_features_vector = glcm_features(image) # 4. Local Binary Pattern (LBP) lbp_features = local_binary_pattern_features(image) #### Add more feature extraction methods here #### edge_feat = edge_density(image) hog_feat = hog_features(image) ################################################## # Concatenate all feature vectors image_features = np.concatenate([hist_features, hu_features, glcm_features_vector, lbp_features ,edge_feat,hog_feat]) return image_features >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1 def perform_pca(data, num_components): # Clean data data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0) # Standardize scaler = StandardScaler() data_standardized = scaler.fit_transform(data) # Apply PCA k = min(num_components, data.shape[1]) pca = PCA(n_components=k) data_reduced = pca.fit_transform(data_standardized) print(f"PCA: Reduced from {data.shape[1]} to {k} components") print(f"Explained variance: {np.sum(pca.explained_variance_ratio_):.4f}") return data_reduced <<<<<<< HEAD def train_svm_model(features, labels, test_size=0.2, random_state=42, use_selectkbest=True, k_best=500, n_pca_components=100, do_gridsearch=False): """ Returns: pipeline: trained sklearn Pipeline (scaler -> optional SelectKBest -> PCA -> SVC) X_test, y_test, y_pred for quick evaluation grid_search (if do_gridsearch True), else None """ if labels.ndim > 1 and labels.shape[1] > 1: labels = np.argmax(labels, axis=1) # stratified split X_train, X_test, y_train, y_test = train_test_split( features, labels, test_size=test_size, random_state=random_state, stratify=labels) # build pipeline steps steps = [] steps.append(('scaler', StandardScaler())) if use_selectkbest: steps.append(('select', SelectKBest(score_func=f_classif, k=min(k_best, X_train.shape[1])))) steps.append(('pca', PCA(n_components=min(n_pca_components, X_train.shape[1])))) steps.append(('svc', SVC(kernel='linear', probability=True, class_weight='balanced', random_state=random_state))) pipeline = Pipeline(steps) grid_search = None if do_gridsearch: param_grid = { 'select__k': [int(min(200, X_train.shape[1])), int(min(500, X_train.shape[1])), int(min(1000, X_train.shape[1]))] if use_selectkbest else [], 'pca__n_components': [50, 100, 200], 'svc__C': [0.1, 1, 5, 10] } # remove empty keys if use_selectkbest is False param_grid = {k: v for k, v in param_grid.items() if v} cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state) grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring='accuracy', verbose=2) grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ pipeline = best_model else: pipeline.fit(X_train, y_train) # Evaluate y_pred = pipeline.predict(X_test) acc = accuracy_score(y_test, y_pred) print(f"Test Accuracy: {acc:.4f}") print(classification_report(y_test, y_pred)) print("Confusion matrix:\n", confusion_matrix(y_test, y_pred)) return pipeline, (X_test, y_test, y_pred), grid_search ======= def train_svm_model(features, labels, test_size=0.2, k=100): """ Trains an SVM model and returns the trained model. Parameters: - features: Feature matrix of shape (B, F) - labels: Label matrix of shape (B, C) if one-hot encoded, or (B,) for single labels - test_size: Proportion of the data to use for testing (default is 0.2) Returns: - svm_model: Trained SVM model """ # Check if labels are one-hot encoded, convert if needed if labels.ndim > 1 and labels.shape[1] > 1: labels = np.argmax(labels, axis=1) # Convert one-hot to single label per sample # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42) # ---------- FIX 1: Standardize TRAIN ONLY ---------- scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # ---------- FIX 2: PCA fit ONLY on TRAIN ---------- pca = PCA(n_components=min(k, X_train_scaled.shape[1])) X_train_reduced = pca.fit_transform(X_train_scaled) X_test_reduced = pca.transform(X_test_scaled) # SVM GridSearch param_grid = { 'C': [0.1, 1], 'gamma': [0.001, 0.0001], 'kernel': ['rbf'] } grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3) grid.fit(X_train_reduced, y_train) # Evaluate preds = grid.predict(X_test_reduced) report = classification_report(y_test, preds) # Return EVERYTHING needed for inference return { "svm": grid, "scaler": scaler, "pca": pca, "report": report } >>>>>>> fc858b4a494501eec4d3f8477c787265b4d94aa1