| import cv2 |
| import numpy as np |
| from skimage.feature.texture import graycomatrix, graycoprops |
| from skimage.feature import local_binary_pattern ,hog |
| from sklearn.decomposition import PCA |
| from sklearn.svm import SVC |
| from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold |
| from sklearn.metrics import accuracy_score, confusion_matrix, classification_report |
| from sklearn.feature_selection import SelectKBest, f_classif |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.pipeline import Pipeline |
|
|
|
|
| def rgb_histogram(image, bins=32): |
| features = [] |
|
|
| |
| image = image.astype(np.float32) |
|
|
| |
| for i in range(3): |
| hist = cv2.calcHist([image], [i], None, [bins], [0, 256]) |
| hist = cv2.normalize(hist, hist).flatten() |
| features.extend(hist) |
|
|
| |
| hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV) |
| for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])): |
| hist = cv2.calcHist([hsv], [i], None, [bins], [low, high]) |
| hist = cv2.normalize(hist, hist).flatten() |
| features.extend(hist) |
|
|
| |
| for i in range(3): |
| channel = image[:, :, i] |
| mean = np.mean(channel) |
| std = np.std(channel) |
| skew = np.cbrt(np.mean((channel - mean) ** 3)) |
| features.extend([mean, std, skew]) |
|
|
| return np.array(features) |
|
|
|
|
| def hu_moments(image): |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
| moments = cv2.moments(gray) |
| hu = cv2.HuMoments(moments).flatten() |
| hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10) |
| |
| hu = np.clip(hu, -10, 10) |
| return hu |
|
|
|
|
| def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64): |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
| gray = (gray // (256 // levels)).astype(np.uint8) |
| features = [] |
|
|
| for d in distances: |
| for a in angles: |
| glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True) |
| props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation'] |
| for p in props: |
| val = graycoprops(glcm, p).flatten() |
| features.extend(val) |
|
|
| return np.array(features) |
|
|
|
|
| def local_binary_pattern_features(image, P=8, R=1): |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
| lbp = local_binary_pattern(gray, P, R, method='uniform') |
| hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True) |
| return hist |
|
|
|
|
| |
| def edge_density(image, low_threshold=50, high_threshold=150): |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
| edges = cv2.Canny(gray, low_threshold, high_threshold) |
| density = np.sum(edges > 0) / edges.size |
| return np.array([density]) |
|
|
|
|
| def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9): |
| image_resized = cv2.resize(image, (128, 128)) |
| gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY) |
| hog_feat = hog(gray, |
| orientations=orientations, |
| pixels_per_cell=pixels_per_cell, |
| cells_per_block=cells_per_block, |
| block_norm='L2-Hys', |
| transform_sqrt=True, |
| feature_vector=True) |
| return hog_feat |
|
|
|
|
| def extract_features_from_image(image): |
| hist = rgb_histogram(image) |
| hu = hu_moments(image) |
| glcm = glcm_features(image) |
| lbp = local_binary_pattern_features(image) |
| edge = edge_density(image) |
| hog_f = hog_features(image) |
|
|
| return np.concatenate([hist, hu, glcm, lbp, edge, hog_f]) |
|
|
| def perform_pca(data, num_components): |
| |
| data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0) |
| |
| |
| scaler = StandardScaler() |
| data_standardized = scaler.fit_transform(data) |
| |
| |
| k = min(num_components, data.shape[1]) |
| pca = PCA(n_components=k) |
| data_reduced = pca.fit_transform(data_standardized) |
| |
| print(f"PCA: Reduced from {data.shape[1]} to {k} components") |
| print(f"Explained variance: {np.sum(pca.explained_variance_ratio_):.4f}") |
| |
| return data_reduced |
|
|
| def train_svm_model(features, labels, |
| test_size=0.2, |
| random_state=42, |
| use_selectkbest=True, |
| k_best=500, |
| n_pca_components=100, |
| do_gridsearch=False): |
| """ |
| Returns: |
| pipeline: trained sklearn Pipeline (scaler -> optional SelectKBest -> PCA -> SVC) |
| X_test, y_test, y_pred for quick evaluation |
| grid_search (if do_gridsearch True), else None |
| """ |
| if labels.ndim > 1 and labels.shape[1] > 1: |
| labels = np.argmax(labels, axis=1) |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split( |
| features, labels, test_size=test_size, random_state=random_state, stratify=labels) |
|
|
| |
| steps = [] |
| steps.append(('scaler', StandardScaler())) |
| if use_selectkbest: |
| steps.append(('select', SelectKBest(score_func=f_classif, k=min(k_best, X_train.shape[1])))) |
| steps.append(('pca', PCA(n_components=min(n_pca_components, X_train.shape[1])))) |
| steps.append(('svc', SVC(kernel='linear', probability=True, class_weight='balanced', random_state=random_state))) |
| pipeline = Pipeline(steps) |
|
|
| grid_search = None |
| if do_gridsearch: |
| param_grid = { |
| 'select__k': [int(min(200, X_train.shape[1])), int(min(500, X_train.shape[1])), int(min(1000, X_train.shape[1]))] if use_selectkbest else [], |
| 'pca__n_components': [50, 100, 200], |
| 'svc__C': [0.1, 1, 5, 10] |
| } |
| |
| param_grid = {k: v for k, v in param_grid.items() if v} |
| cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state) |
| grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring='accuracy', verbose=2) |
| grid_search.fit(X_train, y_train) |
| best_model = grid_search.best_estimator_ |
| pipeline = best_model |
| else: |
| pipeline.fit(X_train, y_train) |
|
|
| |
| y_pred = pipeline.predict(X_test) |
| acc = accuracy_score(y_test, y_pred) |
| print(f"Test Accuracy: {acc:.4f}") |
| print(classification_report(y_test, y_pred)) |
| print("Confusion matrix:\n", confusion_matrix(y_test, y_pred)) |
|
|
| return pipeline, (X_test, y_test, y_pred), grid_search |
|
|
|
|