import cv2 import numpy as np from skimage.feature import graycomatrix, graycoprops import os from glob import glob from PIL import Image, UnidentifiedImageError class GLCMFeatureExtractor: def __init__(self, distances=[1, 3, 5], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]): self.distances = distances self.angles = angles def preprocess_xray(self, img_path): """Robust image loading with multiple fallbacks""" try: # First try with OpenCV img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if img is None: # Fallback to PIL for problematic images try: with Image.open(img_path) as pil_img: img = np.array(pil_img.convert('L')) except (IOError, UnidentifiedImageError) as e: raise ValueError(f"PIL cannot read image: {img_path}") from e # Handle empty images if img.size == 0: raise ValueError(f"Empty image: {img_path}") # Resize and normalize img = cv2.resize(img, (256, 256)) # Improved normalization img = img.astype(np.float32) min_val = np.min(img) max_val = np.max(img) # Handle zero-contrast images if max_val - min_val < 1e-5: img = np.zeros_like(img) # Return black image else: img = (img - min_val) / (max_val - min_val) * 255 return img.astype(np.uint8) except Exception as e: print(f"Error processing {img_path}: {str(e)}") return None def extract_features(self, img): """Extract GLCM features with validation""" if img is None: return None try: # Calculate GLCM with optimized parameters glcm = graycomatrix( img, distances=self.distances, angles=self.angles, levels=256, symmetric=True, normed=True ) # Extract texture properties features = [] props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'] for prop in props: feat = graycoprops(glcm, prop) features.extend(feat.flatten()) return np.array(features) except Exception as e: print(f"Feature extraction error: {str(e)}") return None def extract_from_folder(self, folder_path, max_samples=None): """Batch feature extraction with error handling""" features = [] labels = [] class_name = os.path.basename(folder_path) # Find all image files image_paths = [] for ext in ('*.png', '*.jpg', '*.jpeg', '*.dcm', '*.tif', '*.bmp'): image_paths.extend(glob(os.path.join(folder_path, ext))) if not image_paths: print(f"Warning: No images found in {folder_path}") return np.array([]), np.array([]) # Apply sampling if requested if max_samples and len(image_paths) > max_samples: image_paths = np.random.choice(image_paths, max_samples, replace=False) # Process each image for img_path in image_paths: img = self.preprocess_xray(img_path) if img is None: continue feat = self.extract_features(img) if feat is not None: features.append(feat) labels.append(class_name) print(f"Successfully processed {len(features)}/{len(image_paths)} images in {folder_path}") return np.array(features), np.array(labels)