File size: 4,107 Bytes
62bd7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
import os
from glob import glob
from PIL import Image, UnidentifiedImageError

class GLCMFeatureExtractor:
    def __init__(self, distances=[1, 3, 5], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
        self.distances = distances
        self.angles = angles
    
    def preprocess_xray(self, img_path):
        """Robust image loading with multiple fallbacks"""
        try:
            # First try with OpenCV
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                # Fallback to PIL for problematic images
                try:
                    with Image.open(img_path) as pil_img:
                        img = np.array(pil_img.convert('L'))
                except (IOError, UnidentifiedImageError) as e:
                    raise ValueError(f"PIL cannot read image: {img_path}") from e
            
            # Handle empty images
            if img.size == 0:
                raise ValueError(f"Empty image: {img_path}")
                
            # Resize and normalize
            img = cv2.resize(img, (256, 256))
            
            # Improved normalization
            img = img.astype(np.float32)
            min_val = np.min(img)
            max_val = np.max(img)
            
            # Handle zero-contrast images
            if max_val - min_val < 1e-5:
                img = np.zeros_like(img)  # Return black image
            else:
                img = (img - min_val) / (max_val - min_val) * 255
                
            return img.astype(np.uint8)
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            return None

    def extract_features(self, img):
        """Extract GLCM features with validation"""
        if img is None:
            return None
            
        try:
            # Calculate GLCM with optimized parameters
            glcm = graycomatrix(
                img, 
                distances=self.distances, 
                angles=self.angles, 
                levels=256,
                symmetric=True, 
                normed=True
            )
            
            # Extract texture properties
            features = []
            props = ['contrast', 'dissimilarity', 'homogeneity', 
                    'energy', 'correlation', 'ASM']
            
            for prop in props:
                feat = graycoprops(glcm, prop)
                features.extend(feat.flatten())
                
            return np.array(features)
        except Exception as e:
            print(f"Feature extraction error: {str(e)}")
            return None

    def extract_from_folder(self, folder_path, max_samples=None):
        """Batch feature extraction with error handling"""
        features = []
        labels = []
        class_name = os.path.basename(folder_path)
        
        # Find all image files
        image_paths = []
        for ext in ('*.png', '*.jpg', '*.jpeg', '*.dcm', '*.tif', '*.bmp'):
            image_paths.extend(glob(os.path.join(folder_path, ext)))
            
        if not image_paths:
            print(f"Warning: No images found in {folder_path}")
            return np.array([]), np.array([])
            
        # Apply sampling if requested
        if max_samples and len(image_paths) > max_samples:
            image_paths = np.random.choice(image_paths, max_samples, replace=False)
            
        # Process each image
        for img_path in image_paths:
            img = self.preprocess_xray(img_path)
            if img is None:
                continue
                
            feat = self.extract_features(img)
            if feat is not None:
                features.append(feat)
                labels.append(class_name)
                
        print(f"Successfully processed {len(features)}/{len(image_paths)} images in {folder_path}")
        return np.array(features), np.array(labels)