""" Module Segmentation: Grid Detection & Cell Extraction. This is the CORE PROBLEM of the pipeline. Real-world EL module images contain a grid of cells that must be individually extracted for defect analysis. Approach: 1. Projection profiles: sum pixel intensities along rows/columns → peaks correspond to cell boundaries (dark gaps between cells) 2. Peak detection with adaptive parameters 3. Spacing analysis: validate peaks using periodicity 4. Busbar filtering: busbars create false peaks — detect and exclude them 5. Cell extraction: crop individual cells from detected grid Handles: - Full modules (6×10, 6×12, etc.) - Half-cut cell modules - Partial/zoomed images - Low-contrast images - Missing grid lines Design decision: Projection-based approach over deep learning because: - No training data needed for grid detection - Deterministic and explainable - Works across all module types without retraining - Fast enough for real-time use """ import cv2 import numpy as np from scipy.signal import find_peaks, medfilt from scipy.fft import fft, fftfreq from typing import List, Tuple, Optional, Dict from dataclasses import dataclass, field @dataclass class CellInfo: """Information about a single extracted cell.""" cell_id: int row: int col: int image: np.ndarray # Extracted cell image (grayscale) bbox: Tuple[int, int, int, int] # (y1, x1, y2, x2) in original image area_pixels: int = 0 def to_dict(self) -> dict: return { "cell_id": self.cell_id, "row": self.row, "col": self.col, "bbox": self.bbox, "area_pixels": self.area_pixels, } class ModuleSegmenter: """ Detect cell grid and extract individual cells from EL module images. The algorithm: 1. Preprocess: CLAHE + blur for consistent contrast 2. Compute row and column projections (inverted: gaps are bright) 3. Find peaks in projections = cell boundaries 4. Validate peaks using expected periodicity 5. Filter busbar false peaks 6. Extract cells using detected grid """ def __init__( self, min_cells_per_row: int = 2, min_cells_per_col: int = 2, peak_prominence_factor: float = 0.15, min_cell_size: int = 30, busbar_width_ratio: float = 2.5, ): """ Args: min_cells_per_row: Minimum expected cells per row min_cells_per_col: Minimum expected cells per column peak_prominence_factor: Fraction of projection range for peak prominence min_cell_size: Minimum cell dimension in pixels busbar_width_ratio: Peaks wider than median × this ratio are busbars """ self.min_cells_per_row = min_cells_per_row self.min_cells_per_col = min_cells_per_col self.peak_prominence_factor = peak_prominence_factor self.min_cell_size = min_cell_size self.busbar_width_ratio = busbar_width_ratio def segment(self, image: np.ndarray) -> List[CellInfo]: """ Main entry point: detect grid and extract cells. Args: image: Grayscale EL image (uint8 or float32) Returns: List of CellInfo objects, one per detected cell. If no grid is detected, returns the whole image as one cell. """ # Ensure grayscale uint8 gray = self._prepare_image(image) h, w = gray.shape # Step 1: Check if this is already a single cell if self._is_single_cell(gray): return [CellInfo( cell_id=1, row=0, col=0, image=gray, bbox=(0, 0, h, w), area_pixels=h * w )] # Step 2: Compute projection profiles row_proj = self._compute_projection(gray, axis=1) # horizontal lines col_proj = self._compute_projection(gray, axis=0) # vertical lines # Step 3: Find grid lines row_peaks = self._find_grid_lines(row_proj, h, axis="row") col_peaks = self._find_grid_lines(col_proj, w, axis="col") # Step 4: Filter busbars (they create wider gaps) row_peaks = self._filter_busbars(row_peaks, row_proj) col_peaks = self._filter_busbars(col_peaks, col_proj) # Step 5: Validate periodicity row_peaks = self._validate_periodicity(row_peaks, h) col_peaks = self._validate_periodicity(col_peaks, w) # Step 6: Extract cells cells = self._extract_cells(gray, row_peaks, col_peaks) if len(cells) == 0: # Fallback: return whole image as one cell cells = [CellInfo( cell_id=1, row=0, col=0, image=gray, bbox=(0, 0, h, w), area_pixels=h * w )] return cells def _prepare_image(self, image: np.ndarray) -> np.ndarray: """Convert to grayscale uint8 and apply light preprocessing.""" if image.ndim == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) elif image.dtype == np.float32 or image.dtype == np.float64: if image.max() <= 1.0: gray = (image * 255).astype(np.uint8) else: gray = image.astype(np.uint8) else: gray = image.astype(np.uint8) # Light CLAHE to improve contrast for grid detection clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) enhanced = clahe.apply(gray) return enhanced def _is_single_cell(self, gray: np.ndarray) -> bool: """ Heuristic: detect if image is already a single cell (no grid). Single cells typically: - Are roughly square (aspect ratio close to 1) - Have no strong periodic dark gaps - Are smaller than typical module images """ h, w = gray.shape aspect_ratio = max(h, w) / (min(h, w) + 1) # Very small image is likely a single cell if max(h, w) < 200: return True # Check for periodic gaps in both directions row_proj = self._compute_projection(gray, axis=1) col_proj = self._compute_projection(gray, axis=0) # If no clear periodic pattern, likely single cell row_period = self._estimate_period(row_proj) col_period = self._estimate_period(col_proj) if row_period is None and col_period is None: return True # If the estimated period would give < 2 cells, it's a single cell if row_period and h / row_period < 2: if col_period and w / col_period < 2: return True return False def _compute_projection(self, gray: np.ndarray, axis: int) -> np.ndarray: """ Compute intensity projection profile. axis=0: sum along rows → column profile (detect vertical gaps) axis=1: sum along columns → row profile (detect horizontal gaps) We INVERT the projection because gaps between cells are DARK, so gaps become peaks after inversion. """ # Invert: dark gaps become bright inverted = 255 - gray # Sum along axis projection = inverted.astype(np.float64).mean(axis=axis) # Smooth to reduce noise kernel_size = max(3, len(projection) // 100) if kernel_size % 2 == 0: kernel_size += 1 projection = medfilt(projection, kernel_size=kernel_size) return projection def _estimate_period(self, projection: np.ndarray) -> Optional[int]: """ Estimate periodicity of projection using FFT. Returns estimated period in pixels, or None if no clear period. """ n = len(projection) if n < 20: return None # Remove DC component proj_centered = projection - projection.mean() # FFT fft_vals = np.abs(fft(proj_centered)) freqs = fftfreq(n) # Only look at positive frequencies, skip DC pos_mask = freqs > 0 fft_pos = fft_vals[pos_mask] freq_pos = freqs[pos_mask] if len(fft_pos) == 0: return None # Find dominant frequency peak_idx = np.argmax(fft_pos) dominant_freq = freq_pos[peak_idx] if dominant_freq <= 0: return None period = int(1.0 / dominant_freq) # Validate: period should be reasonable (10-50% of image dimension) if period < n * 0.05 or period > n * 0.6: return None return period def _find_grid_lines( self, projection: np.ndarray, dim_size: int, axis: str ) -> np.ndarray: """ Find peaks in projection profile = cell boundaries. Uses adaptive parameters based on projection statistics. """ if len(projection) < 10: return np.array([], dtype=int) # Adaptive parameters proj_range = projection.max() - projection.min() prominence = proj_range * self.peak_prominence_factor # Estimate minimum distance between peaks period = self._estimate_period(projection) if period is not None: min_distance = max(int(period * 0.5), self.min_cell_size) else: # Fallback: assume at least 4 cells min_distance = max(dim_size // 20, self.min_cell_size) # Find peaks peaks, properties = find_peaks( projection, prominence=prominence, distance=min_distance, height=projection.mean(), # peaks must be above average ) # If too few peaks found, try with relaxed parameters if len(peaks) < 2: peaks, properties = find_peaks( projection, prominence=proj_range * 0.05, # much lower threshold distance=max(dim_size // 30, 10), ) return peaks def _filter_busbars( self, peaks: np.ndarray, projection: np.ndarray ) -> np.ndarray: """ Filter out busbar peaks. Busbars create WIDER gaps than cell spacing. We detect them by comparing peak widths to the median width. Strategy: remove peaks whose "width at half prominence" exceeds median_width × busbar_width_ratio. """ if len(peaks) < 3: return peaks # Estimate peak widths widths = [] for peak in peaks: # Find width at half height half_height = (projection[peak] + projection.min()) / 2 # Search left left = peak while left > 0 and projection[left] > half_height: left -= 1 # Search right right = peak while right < len(projection) - 1 and projection[right] > half_height: right += 1 widths.append(right - left) widths = np.array(widths) median_width = np.median(widths) # Keep peaks with reasonable width mask = widths < median_width * self.busbar_width_ratio return peaks[mask] def _validate_periodicity( self, peaks: np.ndarray, dim_size: int ) -> np.ndarray: """ Validate peaks by checking for periodic spacing. Removes outlier peaks that don't fit the dominant spacing pattern. This handles noise-induced false peaks. """ if len(peaks) < 3: return peaks # Compute spacings between consecutive peaks spacings = np.diff(peaks) if len(spacings) == 0: return peaks median_spacing = np.median(spacings) if median_spacing < self.min_cell_size: return peaks # Filter: keep spacings within 50% of median valid_mask = np.ones(len(peaks), dtype=bool) for i in range(len(spacings)): if abs(spacings[i] - median_spacing) > median_spacing * 0.5: # This spacing is suspicious — remove the peak that causes it # Keep the peak that's more consistent with neighbors if i > 0 and i < len(spacings) - 1: prev_ok = abs(spacings[i-1] - median_spacing) < median_spacing * 0.3 if prev_ok: valid_mask[i + 1] = False else: valid_mask[i] = False return peaks[valid_mask] def _extract_cells( self, gray: np.ndarray, row_peaks: np.ndarray, col_peaks: np.ndarray ) -> List[CellInfo]: """ Extract individual cells from detected grid lines. Row peaks = horizontal boundaries Col peaks = vertical boundaries """ h, w = gray.shape cells = [] # Add image boundaries row_bounds = np.concatenate([[0], row_peaks, [h]]) col_bounds = np.concatenate([[0], col_peaks, [w]]) # Remove duplicate/close boundaries row_bounds = self._merge_close_bounds(row_bounds, self.min_cell_size // 2) col_bounds = self._merge_close_bounds(col_bounds, self.min_cell_size // 2) cell_id = 1 for i in range(len(row_bounds) - 1): for j in range(len(col_bounds) - 1): y1, y2 = int(row_bounds[i]), int(row_bounds[i + 1]) x1, x2 = int(col_bounds[j]), int(col_bounds[j + 1]) # Minimum size check if y2 - y1 < self.min_cell_size or x2 - x1 < self.min_cell_size: continue cell_img = gray[y1:y2, x1:x2] # Skip cells that are mostly background (very dark) if cell_img.mean() < 10: continue cells.append(CellInfo( cell_id=cell_id, row=i, col=j, image=cell_img.copy(), bbox=(y1, x1, y2, x2), area_pixels=(y2 - y1) * (x2 - x1), )) cell_id += 1 return cells def _merge_close_bounds( self, bounds: np.ndarray, min_gap: int ) -> np.ndarray: """Merge boundaries that are too close together.""" if len(bounds) <= 1: return bounds merged = [bounds[0]] for b in bounds[1:]: if b - merged[-1] >= min_gap: merged.append(b) else: # Replace with midpoint merged[-1] = (merged[-1] + b) // 2 return np.array(merged) def get_grid_visualization( self, image: np.ndarray, cells: List[CellInfo] ) -> np.ndarray: """ Draw detected grid on image for visualization. Returns BGR image with colored cell boundaries. """ if image.ndim == 2: vis = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) else: vis = image.copy() for cell in cells: y1, x1, y2, x2 = cell.bbox cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText( vis, f"C{cell.cell_id}", (x1 + 5, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1 ) return vis def estimate_pixel_to_mm( cell_width_px: int, cell_height_px: int, cell_type: str = "standard", ) -> float: """ Estimate pixel-to-mm conversion factor from cell dimensions. Standard crystalline silicon solar cells: - Full cell: 156mm × 156mm (M2) or 166mm × 166mm (M6) or 182mm × 182mm (M10) - Half-cut cell: 156mm × 78mm (M2) or 166mm × 83mm (M6) Args: cell_width_px: Cell width in pixels cell_height_px: Cell height in pixels cell_type: 'standard' (156mm), 'M6' (166mm), 'M10' (182mm) Returns: Conversion factor: mm per pixel """ cell_sizes_mm = { "standard": 156.0, "M2": 156.0, "M6": 166.0, "M10": 182.0, "M12": 210.0, } physical_size = cell_sizes_mm.get(cell_type, 156.0) # Use the larger dimension (cells are roughly square) max_px = max(cell_width_px, cell_height_px) if max_px == 0: return 1.0 # Fallback return physical_size / max_px