File size: 4,107 Bytes

62bd7f8

import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
import os
from glob import glob
from PIL import Image, UnidentifiedImageError

class GLCMFeatureExtractor:
    def __init__(self, distances=[1, 3, 5], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
        self.distances = distances
        self.angles = angles
    
    def preprocess_xray(self, img_path):
        """Robust image loading with multiple fallbacks"""
        try:
            # First try with OpenCV
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                # Fallback to PIL for problematic images
                try:
                    with Image.open(img_path) as pil_img:
                        img = np.array(pil_img.convert('L'))
                except (IOError, UnidentifiedImageError) as e:
                    raise ValueError(f"PIL cannot read image: {img_path}") from e
            
            # Handle empty images
            if img.size == 0:
                raise ValueError(f"Empty image: {img_path}")
                
            # Resize and normalize
            img = cv2.resize(img, (256, 256))
            
            # Improved normalization
            img = img.astype(np.float32)
            min_val = np.min(img)
            max_val = np.max(img)
            
            # Handle zero-contrast images
            if max_val - min_val < 1e-5:
                img = np.zeros_like(img)  # Return black image
            else:
                img = (img - min_val) / (max_val - min_val) * 255
                
            return img.astype(np.uint8)
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            return None

    def extract_features(self, img):
        """Extract GLCM features with validation"""
        if img is None:
            return None
            
        try:
            # Calculate GLCM with optimized parameters
            glcm = graycomatrix(
                img, 
                distances=self.distances, 
                angles=self.angles, 
                levels=256,
                symmetric=True, 
                normed=True
            )
            
            # Extract texture properties
            features = []
            props = ['contrast', 'dissimilarity', 'homogeneity', 
                    'energy', 'correlation', 'ASM']
            
            for prop in props:
                feat = graycoprops(glcm, prop)
                features.extend(feat.flatten())
                
            return np.array(features)
        except Exception as e:
            print(f"Feature extraction error: {str(e)}")
            return None

    def extract_from_folder(self, folder_path, max_samples=None):
        """Batch feature extraction with error handling"""
        features = []
        labels = []
        class_name = os.path.basename(folder_path)
        
        # Find all image files
        image_paths = []
        for ext in ('*.png', '*.jpg', '*.jpeg', '*.dcm', '*.tif', '*.bmp'):
            image_paths.extend(glob(os.path.join(folder_path, ext)))
            
        if not image_paths:
            print(f"Warning: No images found in {folder_path}")
            return np.array([]), np.array([])
            
        # Apply sampling if requested
        if max_samples and len(image_paths) > max_samples:
            image_paths = np.random.choice(image_paths, max_samples, replace=False)
            
        # Process each image
        for img_path in image_paths:
            img = self.preprocess_xray(img_path)
            if img is None:
                continue
                
            feat = self.extract_features(img)
            if feat is not None:
                features.append(feat)
                labels.append(class_name)
                
        print(f"Successfully processed {len(features)}/{len(image_paths)} images in {folder_path}")
        return np.array(features), np.array(labels)