Spaces:

AVLL
/

Automated_Plant_Analysis_Pipeline_Demo

Running

App Files Files Community

Fahimeh Orvati Nia commited on Oct 3

Commit

b4123b8

1 Parent(s): 4768cde

Add sorghum_pipeline code

Browse files

Files changed (39) hide show

sorghum_pipeline/__init__.py +31 -0
sorghum_pipeline/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/__pycache__/config.cpython-312.pyc +0 -0
sorghum_pipeline/__pycache__/pipeline.cpython-312.pyc +0 -0
sorghum_pipeline/config.py +249 -0
sorghum_pipeline/data/__init__.py +15 -0
sorghum_pipeline/data/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/data/__pycache__/loader.cpython-312.pyc +0 -0
sorghum_pipeline/data/__pycache__/mask_handler.cpython-312.pyc +0 -0
sorghum_pipeline/data/__pycache__/preprocessor.cpython-312.pyc +0 -0
sorghum_pipeline/data/loader.py +444 -0
sorghum_pipeline/data/mask_handler.py +296 -0
sorghum_pipeline/data/preprocessor.py +279 -0
sorghum_pipeline/features/__init__.py +21 -0
sorghum_pipeline/features/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/features/__pycache__/morphology.cpython-312.pyc +0 -0
sorghum_pipeline/features/__pycache__/spectral.cpython-312.pyc +0 -0
sorghum_pipeline/features/__pycache__/texture.cpython-312.pyc +0 -0
sorghum_pipeline/features/__pycache__/vegetation.cpython-312.pyc +0 -0
sorghum_pipeline/features/morphology.py +380 -0
sorghum_pipeline/features/spectral.py +383 -0
sorghum_pipeline/features/texture.py +373 -0
sorghum_pipeline/features/vegetation.py +308 -0
sorghum_pipeline/models/__init__.py +10 -0
sorghum_pipeline/models/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/models/__pycache__/dbc_lacunarity.cpython-312.pyc +0 -0
sorghum_pipeline/models/dbc_lacunarity.py +90 -0
sorghum_pipeline/output/__init__.py +13 -0
sorghum_pipeline/output/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/output/__pycache__/manager.cpython-312.pyc +0 -0
sorghum_pipeline/output/manager.py +688 -0
sorghum_pipeline/pipeline.py +1377 -0
sorghum_pipeline/segmentation/__init__.py +12 -0
sorghum_pipeline/segmentation/__pycache__/__init__.cpython-312.pyc +0 -0
sorghum_pipeline/segmentation/__pycache__/advanced_occlusion_handler.cpython-312.pyc +0 -0
sorghum_pipeline/segmentation/__pycache__/leaf_occlusion_handler.cpython-312.pyc +0 -0
sorghum_pipeline/segmentation/__pycache__/manager.cpython-312.pyc +0 -0
sorghum_pipeline/segmentation/__pycache__/occlusion_handler.cpython-312.pyc +0 -0
sorghum_pipeline/segmentation/manager.py +309 -0

sorghum_pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+Sorghum Plant Phenotyping Pipeline
+A comprehensive pipeline for analyzing sorghum plant images including:
+- Data loading and preprocessing
+- Image segmentation and masking
+- Feature extraction (texture, morphology, vegetation indices)
+- Results visualization and export
+Author: Fahime Horvatinia
+Version: 2.0.0
+"""
+__version__ = "2.0.0"
+__author__ = "Fahime Horvatinia"
+from .pipeline import SorghumPipeline
+from .config import Config
+from .data import DataLoader
+from .features import TextureExtractor, VegetationIndexExtractor, MorphologyExtractor
+from .output import OutputManager
+__all__ = [
+    "SorghumPipeline",
+    "Config",
+    "DataLoader",
+    "TextureExtractor",
+    "VegetationIndexExtractor",
+    "MorphologyExtractor",
+    "OutputManager"
+]

sorghum_pipeline/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (943 Bytes). View file

sorghum_pipeline/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (13 kB). View file

sorghum_pipeline/__pycache__/pipeline.cpython-312.pyc ADDED Viewed

Binary file (66.9 kB). View file

sorghum_pipeline/config.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+Configuration management for the Sorghum Pipeline.
+This module handles all configuration settings, paths, and parameters
+used throughout the pipeline.
+"""
+import os
+import yaml
+from pathlib import Path
+from typing import Dict, Any, Optional
+from dataclasses import dataclass, field
+@dataclass
+class Paths:
+    """Configuration for all file paths."""
+    input_folder: str
+    output_folder: str
+    boundingbox_dir: Optional[str] = None
+    labels_folder: Optional[str] = None
+    def __post_init__(self):
+        """Ensure all paths are absolute where provided."""
+        self.input_folder = os.path.abspath(self.input_folder)
+        self.output_folder = os.path.abspath(self.output_folder)
+        if self.boundingbox_dir:
+            self.boundingbox_dir = os.path.abspath(self.boundingbox_dir)
+        if self.labels_folder:
+            self.labels_folder = os.path.abspath(self.labels_folder)
+@dataclass
+class ProcessingParams:
+    """Parameters for image processing."""
+    # Image processing
+    target_size: tuple = (1024, 1024)
+    gaussian_blur_kernel: int = 5
+    morphology_kernel_size: int = 7
+    min_component_area: int = 1000
+    # Segmentation
+    segmentation_threshold: float = 0.5
+    max_components: int = 10
+    # Texture analysis
+    lbp_points: int = 8
+    lbp_radius: int = 1
+    hog_orientations: int = 9
+    hog_pixels_per_cell: tuple = (8, 8)
+    hog_cells_per_block: tuple = (2, 2)
+    lacunarity_window: int = 15
+    ehd_threshold: float = 0.3
+    angle_resolution: int = 45
+    # Vegetation indices
+    epsilon: float = 1e-10
+    soil_factor: float = 0.16
+    # Morphology
+    pixel_to_cm: float = 0.1099609375
+    prune_sizes: list = field(default_factory=lambda: [200, 100, 50, 30, 10])
+@dataclass
+class OutputSettings:
+    """Settings for output generation."""
+    save_images: bool = True
+    save_plots: bool = True
+    save_metadata: bool = True
+    image_dpi: int = 150
+    plot_dpi: int = 100
+    image_format: str = "png"
+    # Subdirectories
+    segmentation_dir: str = "segmentation"
+    features_dir: str = "features"
+    texture_dir: str = "texture"
+    morphology_dir: str = "morphology"
+    vegetation_dir: str = "vegetation_indices"
+    analysis_dir: str = "analysis"
+@dataclass
+class ModelSettings:
+    """Settings for ML models."""
+    device: str = "auto"  # auto, cpu, cuda
+    model_name: str = "briaai/RMBG-2.0"
+    batch_size: int = 1
+    trust_remote_code: bool = True
+    cache_dir: str = ""
+    local_files_only: bool = False
+class Config:
+    """Main configuration class for the Sorghum Pipeline."""
+    def __init__(self, config_path: Optional[str] = None):
+        """
+        Initialize configuration.
+        Args:
+            config_path: Path to YAML configuration file. If None, uses defaults.
+        """
+        self.paths = Paths(
+            input_folder="",
+            output_folder="",
+            boundingbox_dir=""
+        )
+        self.processing = ProcessingParams()
+        self.output = OutputSettings()
+        self.model = ModelSettings()
+        if config_path:
+            self.load_from_file(config_path)
+    def load_from_file(self, config_path: str) -> None:
+        """Load configuration from YAML file."""
+        config_path = Path(config_path)
+        if not config_path.exists():
+            raise FileNotFoundError(f"Configuration file not found: {config_path}")
+        with open(config_path, 'r') as f:
+            config_data = yaml.safe_load(f)
+        # Update paths
+        if 'paths' in config_data:
+            self.paths = Paths(**config_data['paths'])
+        # Update processing parameters
+        if 'processing' in config_data:
+            for key, value in config_data['processing'].items():
+                if hasattr(self.processing, key):
+                    setattr(self.processing, key, value)
+        # Update output settings
+        if 'output' in config_data:
+            for key, value in config_data['output'].items():
+                if hasattr(self.output, key):
+                    setattr(self.output, key, value)
+        # Update model settings
+        if 'model' in config_data:
+            for key, value in config_data['model'].items():
+                if hasattr(self.model, key):
+                    setattr(self.model, key, value)
+    def save_to_file(self, config_path: str) -> None:
+        """Save current configuration to YAML file."""
+        config_data = {
+            'paths': {
+                'input_folder': self.paths.input_folder,
+                'output_folder': self.paths.output_folder,
+                'boundingbox_dir': self.paths.boundingbox_dir,
+                'labels_folder': self.paths.labels_folder
+            },
+            'processing': {
+                'target_size': self.processing.target_size,
+                'gaussian_blur_kernel': self.processing.gaussian_blur_kernel,
+                'morphology_kernel_size': self.processing.morphology_kernel_size,
+                'min_component_area': self.processing.min_component_area,
+                'segmentation_threshold': self.processing.segmentation_threshold,
+                'max_components': self.processing.max_components,
+                'lbp_points': self.processing.lbp_points,
+                'lbp_radius': self.processing.lbp_radius,
+                'hog_orientations': self.processing.hog_orientations,
+                'hog_pixels_per_cell': self.processing.hog_pixels_per_cell,
+                'hog_cells_per_block': self.processing.hog_cells_per_block,
+                'lacunarity_window': self.processing.lacunarity_window,
+                'ehd_threshold': self.processing.ehd_threshold,
+                'angle_resolution': self.processing.angle_resolution,
+                'epsilon': self.processing.epsilon,
+                'soil_factor': self.processing.soil_factor,
+                'pixel_to_cm': self.processing.pixel_to_cm,
+                'prune_sizes': self.processing.prune_sizes
+            },
+            'output': {
+                'save_images': self.output.save_images,
+                'save_plots': self.output.save_plots,
+                'save_metadata': self.output.save_metadata,
+                'image_dpi': self.output.image_dpi,
+                'plot_dpi': self.output.plot_dpi,
+                'image_format': self.output.image_format,
+                'segmentation_dir': self.output.segmentation_dir,
+                'features_dir': self.output.features_dir,
+                'texture_dir': self.output.texture_dir,
+                'morphology_dir': self.output.morphology_dir,
+                'vegetation_dir': self.output.vegetation_dir,
+                'analysis_dir': self.output.analysis_dir
+            },
+            'model': {
+                'device': self.model.device,
+                'model_name': self.model.model_name,
+                'batch_size': self.model.batch_size,
+                'trust_remote_code': self.model.trust_remote_code,
+                'cache_dir': self.model.cache_dir,
+                'local_files_only': self.model.local_files_only,
+            }
+        }
+        with open(config_path, 'w') as f:
+            yaml.dump(config_data, f, default_flow_style=False, indent=2)
+    def get_device(self) -> str:
+        """Get the appropriate device for processing."""
+        if self.model.device == "auto":
+            import torch
+            return "cuda" if torch.cuda.is_available() else "cpu"
+        return self.model.device
+    def create_output_directories(self, base_path: str) -> None:
+        """Ensure base output directory exists only.
+        Subdirectories are created per plant in the output manager.
+        """
+        base_path = Path(base_path)
+        base_path.mkdir(parents=True, exist_ok=True)
+    def validate(self) -> bool:
+        """Validate configuration settings."""
+        # Check if input directory exists
+        if not os.path.exists(self.paths.input_folder):
+            raise FileNotFoundError(f"Input folder does not exist: {self.paths.input_folder}")
+        # Check if bounding box directory exists (optional)
+        if hasattr(self.paths, 'boundingbox_dir') and self.paths.boundingbox_dir and not os.path.exists(self.paths.boundingbox_dir):
+            raise FileNotFoundError(f"Bounding box directory does not exist: {self.paths.boundingbox_dir}")
+        # Validate processing parameters
+        if self.processing.target_size[0] <= 0 or self.processing.target_size[1] <= 0:
+            raise ValueError("Target size must be positive")
+        if self.processing.segmentation_threshold < 0 or self.processing.segmentation_threshold > 1:
+            raise ValueError("Segmentation threshold must be between 0 and 1")
+        return True
+def create_default_config(output_path: str) -> None:
+    """Create a default configuration file."""
+    config = Config()
+    config.paths = Paths(
+        input_folder="Sorghum_dataset",
+        output_folder="Sorghum_pipeline_Results",
+        boundingbox_dir="boundingbox",
+        labels_folder="labels"
+    )
+    config.save_to_file(output_path)
+    print(f"Default configuration created at: {output_path}")

sorghum_pipeline/data/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""
+Data loading and preprocessing modules.
+This package contains all data-related functionality including:
+- Raw image loading
+- Data preprocessing
+- Mask handling
+- Data validation
+"""
+from .loader import DataLoader
+from .preprocessor import ImagePreprocessor
+from .mask_handler import MaskHandler
+__all__ = ["DataLoader", "ImagePreprocessor", "MaskHandler"]

sorghum_pipeline/data/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (577 Bytes). View file

sorghum_pipeline/data/__pycache__/loader.cpython-312.pyc ADDED Viewed

Binary file (21.9 kB). View file

sorghum_pipeline/data/__pycache__/mask_handler.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

sorghum_pipeline/data/__pycache__/preprocessor.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

sorghum_pipeline/data/loader.py ADDED Viewed

	@@ -0,0 +1,444 @@

+"""
+Data loading functionality for the Sorghum Pipeline.
+This module handles loading raw images, managing plant data,
+and organizing data according to the pipeline requirements.
+"""
+import os
+import glob
+import json
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Any
+from PIL import Image
+import numpy as np
+import logging
+logger = logging.getLogger(__name__)
+class DataLoader:
+    """Handles loading and organizing plant image data."""
+    # Plants to ignore completely (empty by default)
+    IGNORE_PLANTS = set()
+    # Plants where you want exactly one frame from their own folder
+    EXACT_FRAME = {
+        4: 7, 5: 5, 7: 5, 12: 5, 13: 5, 18: 7, 19: 2, 20: 3,
+        24: 6, 25: 5, 26: 5, 30: 8, 37: 7
+    }
+    # Plants where you want to borrow a frame from a different plant folder
+    BORROW_FRAME = {
+        14: (13, 5), 15: (14, 5), 16: (15, 5), 33: (34, 7),
+        34: (35, 7), 35: (35, 8), 36: (36, 6)
+    }
+    # Overrides provided by user: preferred frame per target plant name
+    FRAME_OVERRIDE_BY_NAME = {
+        'plant1': 9, 'plant2': 10, 'plant3': 9, 'plant5': 7, 'plant6': 9, 'plant8': 5,
+        'plant7': 9, 'plant10': 9, 'plant11': 9, 'plant12': 9,
+        'plant13': 10, 'plant14': 8, 'plant15': 11, 'plant19': 4, 'plant20': 7,
+        'plant21': 9, 'plant22': 10, 'plant25': 4, 'plant26': 2, 'plant27': 10, 'plant28': 9, 'plant29': 2,
+        'plant30': 9, 'plant31': 10, 'plant32': 9, 'plant33': 8,
+        'plant35': 9, 'plant36': 4, 'plant38': 9, 'plant39': 9, 'plant41': 9,
+        'plant42': 6, 'plant43': 10, 'plant44': 9, 'plant45': 7,
+        'plant47': 10, 'plant48': 11,
+    }
+    # Substitutes provided by user: map target plant name -> source plant name
+    PLANT_SUBSTITUTES_BY_NAME = {
+        'plant16': 'plant15', 'plant15': 'plant14', 'plant14': 'plant13',
+        'plant13': 'plant12', 'plant33': 'plant34', 'plant34': 'plant35',
+        'plant24': 'plant25', 'plant25': 'plant25', 'plant35': 'plant36',
+        'plant36': 'plant37', 'plant37': 'plant37', 'plant44': 'plant43',
+        'plant45': 'plant44',
+    }
+    def __init__(self, input_folder: str, debug: bool = False, include_ignored: bool = False, strict_loader: bool = False, excluded_dates: Optional[List[str]] = None):
+        """
+        Initialize the data loader.
+        Args:
+            input_folder: Path to the input dataset folder
+            debug: Enable debug logging
+        """
+        self.input_folder = Path(input_folder)
+        self.debug = debug
+        self.include_ignored = include_ignored
+        self.strict_loader = strict_loader
+        if not self.input_folder.exists():
+            raise FileNotFoundError(f"Input folder does not exist: {input_folder}")
+        # Normalize excluded dates as a set of folder names (with dashes)
+        self.excluded_dates = set(excluded_dates or [])
+    def load_selected_frames(self) -> Dict[str, Dict[str, Any]]:
+        """
+        Load selected frames according to predefined rules.
+        If strict_loader is True, load only frame numbers from the plant's own folder (no borrowing/special picks).
+        Returns:
+            Dictionary with plant data organized by key format: "YYYY_MM_DD_plantX_frameY"
+        """
+        logger.info("Loading selected frames from dataset...")
+        plants = {}
+        # Detect if input folder is a direct date folder (contains plant folders)
+        first_items = list(self.input_folder.iterdir())
+        has_plant_folders = any(item.is_dir() and item.name.startswith('plant') for item in first_items)
+        def choose_frame_and_source(pid: int) -> Tuple[int, str]:
+            if self.strict_loader:
+                # In strict mode, honor explicit frame overrides AND substitution of source plant
+                plant_name_local = f"plant{pid}"
+                frame_num = self.FRAME_OVERRIDE_BY_NAME.get(
+                    plant_name_local,
+                    self.EXACT_FRAME.get(pid, 8)
+                )
+                source_plant = self.PLANT_SUBSTITUTES_BY_NAME.get(plant_name_local, plant_name_local)
+                return frame_num, source_plant
+            # Original behavior
+            frame_num = self._get_frame_number(pid)
+            source_plant = self._get_source_plant(pid)
+            return frame_num, source_plant
+        if has_plant_folders:
+            # Direct date folder structure
+            date_name = self.input_folder.name
+            date_path = self.input_folder
+            for plant_name in sorted(os.listdir(date_path)):
+                plant_path = date_path / plant_name
+                if not plant_path.is_dir():
+                    continue
+                try:
+                    plant_id = int(plant_name.replace("plant", ""))
+                except ValueError:
+                    continue
+                if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
+                    if self.debug:
+                        logger.debug(f"Ignoring plant {plant_id}")
+                    continue
+                frame_num, source_plant = choose_frame_and_source(plant_id)
+                frame_data = self._load_single_frame(date_path, source_plant, frame_num, plant_name)
+                if frame_data:
+                    key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_num}"
+                    plants[key] = frame_data
+                    logger.debug(f"Loaded {key}")
+        else:
+            # Parent folder structure with date subfolders
+            for date_name in sorted(os.listdir(self.input_folder)):
+                date_path = self.input_folder / date_name
+                if not date_path.is_dir():
+                    continue
+                if date_name in self.excluded_dates:
+                    logger.info(f"Skipping excluded date: {date_name}")
+                    continue
+                for plant_name in sorted(os.listdir(date_path)):
+                    plant_path = date_path / plant_name
+                    if not plant_path.is_dir():
+                        continue
+                    try:
+                        plant_id = int(plant_name.replace("plant", ""))
+                    except ValueError:
+                        continue
+                    if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
+                        if self.debug:
+                            logger.debug(f"Ignoring plant {plant_id}")
+                        continue
+                    frame_num, source_plant = choose_frame_and_source(plant_id)
+                    frame_data = self._load_single_frame(date_path, source_plant, frame_num, plant_name)
+                    if frame_data:
+                        key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_num}"
+                        plants[key] = frame_data
+                        logger.debug(f"Loaded {key}")
+        logger.info(f"Successfully loaded {len(plants)} plant frames")
+        return plants
+    def load_all_frames(self) -> Dict[str, Dict[str, Any]]:
+        """
+        Load all available frames for each plant.
+        Returns:
+            Dictionary with all plant frames
+        """
+        logger.info("Loading all frames from dataset...")
+        plants = {}
+        # Check if we're directly in a date folder (contains plant folders)
+        # or in a parent folder (contains date folders)
+        first_items = list(self.input_folder.iterdir())
+        has_plant_folders = any(item.is_dir() and item.name.startswith('plant') for item in first_items)
+        if has_plant_folders:
+            # We're directly in a date folder
+            logger.info("Detected direct date folder structure")
+            date_name = self.input_folder.name
+            self._load_plants_from_date_folder(self.input_folder, date_name, plants)
+        else:
+            # We're in a parent folder with date subfolders
+            logger.info("Detected parent folder structure")
+            for date_name in sorted(os.listdir(self.input_folder)):
+                date_path = self.input_folder / date_name
+                if not date_path.is_dir():
+                    continue
+                if date_name in self.excluded_dates:
+                    logger.info(f"Skipping excluded date: {date_name}")
+                    continue
+                logger.info(f"Processing date: {date_name}")
+                self._load_plants_from_date_folder(date_path, date_name, plants)
+        logger.info(f"Successfully loaded {len(plants)} plant frames")
+        return plants
+    def _load_plants_from_date_folder(self, date_path: Path, date_name: str, plants: Dict[str, Dict[str, Any]]) -> None:
+        """Load plants from a date folder."""
+        for plant_name in sorted(os.listdir(date_path)):
+            plant_path = date_path / plant_name
+            if not plant_path.is_dir():
+                continue
+            # Extract plant ID
+            try:
+                plant_id = int(plant_name.replace("plant", ""))
+            except ValueError:
+                logger.warning(f"Could not extract plant ID from {plant_name}")
+                continue
+            # Skip ignored plants
+            if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
+                logger.info(f"Skipping ignored plant {plant_id}")
+                continue
+            logger.info(f"Processing plant {plant_id}")
+            # Load all frames for this plant
+            pattern = str(plant_path / f"{plant_name}_frame*.tif")
+            frame_files = sorted(glob.glob(pattern))
+            logger.info(f"Found {len(frame_files)} frame files for {plant_name}")
+            for frame_path in frame_files:
+                frame_data = self._load_frame_from_path(frame_path, plant_name)
+                if frame_data:
+                    frame_id = Path(frame_path).stem.split("_frame")[-1]
+                    key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_id}"
+                    plants[key] = frame_data
+                    logger.debug(f"Loaded frame: {key}")
+                else:
+                    logger.warning(f"Failed to load frame: {frame_path}")
+    def load_single_plant(self, date: str, plant: str, frame: int) -> Optional[Dict[str, Any]]:
+        """
+        Load a specific plant frame.
+        Args:
+            date: Date string (e.g., "2025-02-05")
+            plant: Plant name (e.g., "plant1")
+            frame: Frame number
+        Returns:
+            Plant data dictionary or None if not found
+        """
+        date_path = self.input_folder / date
+        if not date_path.exists():
+            logger.error(f"Date folder not found: {date}")
+            return None
+        plant_path = date_path / plant
+        if not plant_path.exists():
+            logger.error(f"Plant folder not found: {plant}")
+            return None
+        filename = f"{plant}_frame{frame}.tif"
+        frame_path = plant_path / filename
+        return self._load_frame_from_path(str(frame_path), plant)
+    def _get_frame_number(self, plant_id: int) -> int:
+        """Get the frame number for a plant ID."""
+        plant_name = f"plant{plant_id}"
+        # Highest priority: explicit overrides by plant name
+        if plant_name in self.FRAME_OVERRIDE_BY_NAME:
+            return int(self.FRAME_OVERRIDE_BY_NAME[plant_name])
+        # Next: original exact/borrrow rules
+        if plant_id in self.EXACT_FRAME:
+            return self.EXACT_FRAME[plant_id]
+        elif plant_id in self.BORROW_FRAME:
+            return self.BORROW_FRAME[plant_id][1]
+        else:
+            return 8  # Default frame
+    def _get_source_plant(self, plant_id: int) -> str:
+        """Get the source plant name for a plant ID."""
+        plant_name = f"plant{plant_id}"
+        # Highest priority: explicit substitutes by plant name
+        if plant_name in self.PLANT_SUBSTITUTES_BY_NAME:
+            return self.PLANT_SUBSTITUTES_BY_NAME[plant_name]
+        # Next: original borrow rules
+        if plant_id in self.BORROW_FRAME:
+            source_id = self.BORROW_FRAME[plant_id][0]
+            return f"plant{source_id}"
+        else:
+            return f"plant{plant_id}"
+    def _load_single_frame(self, date_path: Path, source_plant: str,
+                          frame_num: int, target_plant: str) -> Optional[Dict[str, Any]]:
+        """Load a single frame from the specified path."""
+        filename = f"{source_plant}_frame{frame_num}.tif"
+        frame_path = date_path / source_plant / filename
+        if not frame_path.exists():
+            if self.debug:
+                logger.warning(f"Frame not found: {frame_path}")
+            return None
+        return self._load_frame_from_path(str(frame_path), target_plant)
+    def _load_frame_from_path(self, frame_path: str, plant_name: str) -> Optional[Dict[str, Any]]:
+        """Load frame data from a file path."""
+        try:
+            logger.debug(f"Attempting to load: {frame_path}")
+            image = Image.open(frame_path)
+            filename = Path(frame_path).name
+            logger.debug(f"Successfully loaded image: {filename}, size: {image.size}")
+            return {
+                "raw_image": (image, filename),
+                "plant_name": plant_name,
+                "file_path": frame_path
+            }
+        except Exception as e:
+            logger.error(f"Failed to load {frame_path}: {e}")
+            return None
+    def load_bounding_boxes(self, bbox_dir: str) -> Dict[str, Tuple[int, int, int, int]]:
+        """
+        Load bounding box data from JSON files.
+        Args:
+            bbox_dir: Directory containing bounding box JSON files
+        Returns:
+            Dictionary mapping plant names to bounding box coordinates
+        """
+        bbox_path = Path(bbox_dir)
+        if not bbox_path.exists():
+            raise FileNotFoundError(f"Bounding box directory not found: {bbox_dir}")
+        bbox_lookup = {}
+        for json_file in bbox_path.glob("*.json"):
+            stem = json_file.stem
+            # Normalize stems like plant_33_new -> plant33
+            if stem.startswith('plant_'):
+                parts = stem.split('_')
+                try:
+                    idx = next(i for i,p in enumerate(parts) if p.isdigit())
+                    plant_id = f"plant{parts[idx]}"
+                except Exception:
+                    plant_id = stem.replace('_', '')
+            else:
+                plant_id = stem
+            try:
+                with open(json_file, 'r') as f:
+                    data = json.load(f)
+                shapes = data.get('shapes', [])
+                # Prefer rectangle labeled 'sorghum' (case-insensitive), else first rectangle
+                def _is_sorghum_label(s: dict) -> bool:
+                    for key in ('label', 'name', 'text'):
+                        val = s.get(key)
+                        if isinstance(val, str) and val.lower() == 'sorghum':
+                            return True
+                    return False
+                rect = next((s for s in shapes if s.get('shape_type') == 'rectangle' and _is_sorghum_label(s)), None)
+                if rect is None:
+                    rect = next((s for s in shapes if s.get('shape_type') == 'rectangle'), None)
+                if rect:
+                    (x1, y1), (x2, y2) = rect['points']
+                    bbox_lookup[plant_id] = (
+                        int(max(0, x1)),
+                        int(max(0, y1)),
+                        int(min(1e9, x2)),
+                        int(min(1e9, y2))
+                    )
+                else:
+                    bbox_lookup[plant_id] = None
+            except Exception as e:
+                logger.error(f"Failed to load bounding box {json_file}: {e}")
+        logger.info(f"Loaded {len(bbox_lookup)} bounding boxes")
+        return bbox_lookup
+    def load_hand_labels(self, labels_dir: str) -> Dict[str, np.ndarray]:
+        """
+        Load hand-labeled masks from JSON files.
+        Args:
+            labels_dir: Directory containing label JSON files
+        Returns:
+            Dictionary mapping plant names to mask arrays
+        """
+        labels_path = Path(labels_dir)
+        if not labels_path.exists():
+            logger.warning(f"Labels directory not found: {labels_dir}")
+            return {}
+        masks = {}
+        for json_file in labels_path.glob("*.json"):
+            plant_id = json_file.stem
+            try:
+                with open(json_file, 'r') as f:
+                    data = json.load(f)
+                # Create mask from shapes (assuming we have image dimensions)
+                # This would need to be adapted based on your label format
+                mask = self._create_mask_from_shapes(data)
+                if mask is not None:
+                    masks[plant_id] = mask
+            except Exception as e:
+                logger.error(f"Failed to load label {json_file}: {e}")
+        logger.info(f"Loaded {len(masks)} hand labels")
+        return masks
+    def _create_mask_from_shapes(self, data: Dict) -> Optional[np.ndarray]:
+        """Create a mask array from shape data."""
+        # This is a placeholder - implement based on your label format
+        # For now, return None
+        return None
+    def validate_data(self, plants: Dict[str, Dict[str, Any]]) -> bool:
+        """
+        Validate loaded plant data.
+        Args:
+            plants: Dictionary of plant data
+        Returns:
+            True if data is valid, False otherwise
+        """
+        if not plants:
+            logger.error("No plant data loaded")
+            return False
+        for key, data in plants.items():
+            if "raw_image" not in data:
+                logger.error(f"Missing raw_image in {key}")
+                return False
+            image, filename = data["raw_image"]
+            if not isinstance(image, Image.Image):
+                logger.error(f"Invalid image type in {key}")
+                return False
+        logger.info("Data validation passed")
+        return True

sorghum_pipeline/data/mask_handler.py ADDED Viewed

	@@ -0,0 +1,296 @@

+"""
+Mask handling functionality for the Sorghum Pipeline.
+This module handles mask creation, processing, and validation
+for plant segmentation tasks.
+"""
+import numpy as np
+import cv2
+from typing import Dict, Tuple, Optional, List
+import logging
+logger = logging.getLogger(__name__)
+class MaskHandler:
+    """Handles mask creation, processing, and validation."""
+    def __init__(self, min_area: int = 1000, kernel_size: int = 7):
+        """
+        Initialize the mask handler.
+        Args:
+            min_area: Minimum area for connected components
+            kernel_size: Kernel size for morphological operations
+        """
+        self.min_area = min_area
+        self.kernel_size = kernel_size
+    def create_bounding_box_mask(self, image_shape: Tuple[int, int],
+                                bbox: Tuple[int, int, int, int]) -> np.ndarray:
+        """
+        Create a mask from bounding box coordinates.
+        Args:
+            image_shape: Shape of the image (height, width)
+            bbox: Bounding box coordinates (x1, y1, x2, y2)
+        Returns:
+            Binary mask array
+        """
+        h, w = image_shape[:2]
+        mask = np.zeros((h, w), dtype=np.uint8)
+        x1, y1, x2, y2 = bbox
+        # Clamp coordinates to image bounds
+        x1 = max(0, min(w, x1))
+        y1 = max(0, min(h, y1))
+        x2 = max(0, min(w, x2))
+        y2 = max(0, min(h, y2))
+        mask[y1:y2, x1:x2] = 255
+        return mask
+    def preprocess_mask(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Preprocess mask by cleaning and filtering.
+        Args:
+            mask: Input mask
+        Returns:
+            Cleaned mask
+        """
+        if mask is None:
+            return None
+        # Convert to binary if needed
+        if isinstance(mask, tuple):
+            mask = mask[0]
+        # Ensure binary format
+        mask = ((mask.astype(np.int32) > 0).astype(np.uint8)) * 255
+        # Morphological opening to remove noise
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (self.kernel_size, self.kernel_size)
+        )
+        opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+        # Remove small connected components
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
+            opened, connectivity=8
+        )
+        clean_mask = np.zeros_like(opened)
+        for label in range(1, num_labels):  # Skip background
+            if stats[label, cv2.CC_STAT_AREA] >= self.min_area:
+                clean_mask[labels == label] = 255
+        return clean_mask
+    def keep_largest_component(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Keep only the largest connected component in the mask.
+        Args:
+            mask: Input mask
+        Returns:
+            Mask with only the largest component
+        """
+        if mask is None:
+            return None
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, 8)
+        if num_labels <= 1:
+            return mask
+        # Find the largest component (excluding background)
+        areas = stats[1:, cv2.CC_STAT_AREA]
+        largest_label = 1 + np.argmax(areas)
+        # Create mask with only the largest component
+        largest_mask = (labels == largest_label).astype(np.uint8) * 255
+        return largest_mask
+    def apply_mask_to_image(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+        """
+        Apply mask to image.
+        Args:
+            image: Input image
+            mask: Binary mask
+        Returns:
+            Masked image
+        """
+        if mask is None:
+            return image
+        return cv2.bitwise_and(image, image, mask=mask)
+    def create_overlay(self, image: np.ndarray, mask: np.ndarray,
+                      color: Tuple[int, int, int] = (0, 255, 0),
+                      alpha: float = 0.5) -> np.ndarray:
+        """
+        Create overlay of mask on image.
+        Args:
+            image: Base image
+            mask: Binary mask
+            color: Overlay color (B, G, R)
+            alpha: Overlay transparency
+        Returns:
+            Image with mask overlay
+        """
+        overlay = image.copy()
+        overlay[mask == 255] = color
+        return cv2.addWeighted(image, 1.0 - alpha, overlay, alpha, 0)
+    def get_mask_properties(self, mask: np.ndarray) -> Dict[str, float]:
+        """
+        Get properties of the mask.
+        Args:
+            mask: Binary mask
+        Returns:
+            Dictionary of mask properties
+        """
+        if mask is None:
+            return {}
+        # Convert to binary
+        binary_mask = (mask > 127).astype(np.uint8)
+        # Calculate properties
+        area = np.sum(binary_mask)
+        perimeter = cv2.arcLength(
+            cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0][0],
+            True
+        ) if len(cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]) > 0 else 0
+        # Bounding box
+        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        if contours:
+            x, y, w, h = cv2.boundingRect(contours[0])
+            bbox_area = w * h
+            aspect_ratio = w / h if h > 0 else 0
+        else:
+            bbox_area = 0
+            aspect_ratio = 0
+        return {
+            "area": float(area),
+            "perimeter": float(perimeter),
+            "bbox_area": float(bbox_area),
+            "aspect_ratio": float(aspect_ratio),
+            "coverage": float(area) / (mask.shape[0] * mask.shape[1]) if mask.size > 0 else 0.0
+        }
+    def validate_mask(self, mask: np.ndarray) -> bool:
+        """
+        Validate mask format and content.
+        Args:
+            mask: Mask to validate
+        Returns:
+            True if valid, False otherwise
+        """
+        if mask is None:
+            return False
+        if not isinstance(mask, np.ndarray):
+            return False
+        if mask.ndim != 2:
+            return False
+        if mask.dtype not in [np.uint8, np.bool_]:
+            return False
+        # Check if mask has any foreground pixels
+        if np.sum(mask > 0) == 0:
+            logger.warning("Mask has no foreground pixels")
+            return False
+        return True
+    def resize_mask(self, mask: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
+        """
+        Resize mask to target size.
+        Args:
+            mask: Input mask
+            target_size: Target size (width, height)
+        Returns:
+            Resized mask
+        """
+        if mask is None:
+            return None
+        return cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
+    def dilate_mask(self, mask: np.ndarray, kernel_size: int = 5) -> np.ndarray:
+        """
+        Dilate mask to expand foreground regions.
+        Args:
+            mask: Input mask
+            kernel_size: Size of dilation kernel
+        Returns:
+            Dilated mask
+        """
+        if mask is None:
+            return None
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+        return cv2.dilate(mask, kernel, iterations=1)
+    def erode_mask(self, mask: np.ndarray, kernel_size: int = 5) -> np.ndarray:
+        """
+        Erode mask to shrink foreground regions.
+        Args:
+            mask: Input mask
+            kernel_size: Size of erosion kernel
+        Returns:
+            Eroded mask
+        """
+        if mask is None:
+            return None
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+        return cv2.erode(mask, kernel, iterations=1)
+    def fill_holes(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Fill holes in the mask.
+        Args:
+            mask: Input mask
+        Returns:
+            Mask with filled holes
+        """
+        if mask is None:
+            return None
+        # Find contours
+        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Create filled mask
+        filled_mask = np.zeros_like(mask)
+        cv2.fillPoly(filled_mask, contours, 255)
+        return filled_mask

sorghum_pipeline/data/preprocessor.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Image preprocessing functionality for the Sorghum Pipeline.
+This module handles image preprocessing, composite creation,
+and basic image transformations.
+"""
+import numpy as np
+import cv2
+from PIL import Image
+from typing import Dict, Tuple, Any, Optional
+from itertools import product
+import logging
+logger = logging.getLogger(__name__)
+class ImagePreprocessor:
+    """Handles image preprocessing and composite creation."""
+    def __init__(self, target_size: Optional[Tuple[int, int]] = None):
+        """
+        Initialize the image preprocessor.
+        Args:
+            target_size: Target size for image resizing (width, height)
+        """
+        self.target_size = target_size
+    def convert_to_uint8(self, arr: np.ndarray) -> np.ndarray:
+        """
+        Convert array to uint8 format with proper normalization.
+        Args:
+            arr: Input array
+        Returns:
+            Normalized uint8 array
+        """
+        # Handle NaN and infinite values
+        arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
+        # Normalize to 0-255 range
+        if arr.ptp() > 0:
+            normalized = (arr - arr.min()) / (arr.ptp() + 1e-6) * 255
+        else:
+            normalized = np.zeros_like(arr)
+        return np.clip(normalized, 0, 255).astype(np.uint8)
+    def process_raw_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
+        """
+        Process raw 4-band image into composite and spectral bands.
+        Args:
+            pil_img: PIL Image object containing 4-band data
+        Returns:
+            Tuple of (composite_image, spectral_bands_dict)
+        """
+        # Split the 4-band RAW into tiles and stack them
+        d = pil_img.size[0] // 2
+        boxes = [
+            (j, i, j + d, i + d)
+            for i, j in product(
+                range(0, pil_img.height, d),
+                range(0, pil_img.width, d)
+            )
+        ]
+        # Extract tiles and stack them
+        stack = np.stack([
+            np.array(pil_img.crop(box), dtype=float)
+            for box in boxes
+        ], axis=-1)
+        # Bands come in order: [green, red, red_edge, nir]
+        green, red, red_edge, nir = np.split(stack, 4, axis=-1)
+        # Build pseudo-RGB composite as (green, red_edge, red)
+        composite = np.concatenate([green, red_edge, red], axis=-1)
+        composite_uint8 = self.convert_to_uint8(composite)
+        # Prepare spectral stack
+        spectral_bands = {
+            "green": green,
+            "red": red,
+            "red_edge": red_edge,
+            "nir": nir
+        }
+        return composite_uint8, spectral_bands
+    def create_composites(self, plants: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """
+        Create composites for all plants in the dataset.
+        Args:
+            plants: Dictionary of plant data
+        Returns:
+            Updated plant data with composites and spectral stacks
+        """
+        logger.info("Creating composites for all plants...")
+        for key, pdata in plants.items():
+            try:
+                # Find the PIL Image
+                if "raw_image" in pdata:
+                    image, _ = pdata["raw_image"]
+                elif "raw_images" in pdata and pdata["raw_images"]:
+                    image, _ = pdata["raw_images"][0]
+                else:
+                    logger.warning(f"No raw image found for {key}")
+                    continue
+                # Process the image
+                composite, spectral_stack = self.process_raw_image(image)
+                # Store results
+                pdata["composite"] = composite
+                pdata["spectral_stack"] = spectral_stack
+                logger.debug(f"Created composite for {key}")
+            except Exception as e:
+                logger.error(f"Failed to create composite for {key}: {e}")
+                continue
+        logger.info("Composite creation completed")
+        return plants
+    def resize_image(self, image: np.ndarray, target_size: Optional[Tuple[int, int]] = None) -> np.ndarray:
+        """
+        Resize image to target size.
+        Args:
+            image: Input image
+            target_size: Target size (width, height). If None, uses self.target_size
+        Returns:
+            Resized image
+        """
+        if target_size is None:
+            target_size = self.target_size
+        if target_size is None:
+            return image
+        return cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
+    def normalize_image(self, image: np.ndarray, method: str = "minmax") -> np.ndarray:
+        """
+        Normalize image using specified method.
+        Args:
+            image: Input image
+            method: Normalization method ("minmax", "zscore", "robust")
+        Returns:
+            Normalized image
+        """
+        if method == "minmax":
+            if image.dtype == np.uint8:
+                return image.astype(np.float32) / 255.0
+            else:
+                img_min, img_max = image.min(), image.max()
+                if img_max > img_min:
+                    return (image - img_min) / (img_max - img_min)
+                else:
+                    return np.zeros_like(image, dtype=np.float32)
+        elif method == "zscore":
+            mean, std = image.mean(), image.std()
+            if std > 0:
+                return (image - mean) / std
+            else:
+                return np.zeros_like(image, dtype=np.float32)
+        elif method == "robust":
+            q25, q75 = np.percentile(image, [25, 75])
+            if q75 > q25:
+                return (image - q25) / (q75 - q25)
+            else:
+                return np.zeros_like(image, dtype=np.float32)
+        else:
+            raise ValueError(f"Unknown normalization method: {method}")
+    def apply_gaussian_blur(self, image: np.ndarray, kernel_size: int = 5) -> np.ndarray:
+        """
+        Apply Gaussian blur to image.
+        Args:
+            image: Input image
+            kernel_size: Size of Gaussian kernel
+        Returns:
+            Blurred image
+        """
+        if kernel_size % 2 == 0:
+            kernel_size += 1
+        return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
+    def apply_sharpening(self, image: np.ndarray) -> np.ndarray:
+        """
+        Apply sharpening filter to image.
+        Args:
+            image: Input image
+        Returns:
+            Sharpened image
+        """
+        kernel = np.array([
+            [0, -1, 0],
+            [-1, 5, -1],
+            [0, -1, 0]
+        ])
+        return cv2.filter2D(image, -1, kernel)
+    def enhance_contrast(self, image: np.ndarray, alpha: float = 1.2, beta: int = 15) -> np.ndarray:
+        """
+        Enhance image contrast.
+        Args:
+            image: Input image
+            alpha: Contrast control (1.0 = no change)
+            beta: Brightness control (0 = no change)
+        Returns:
+            Enhanced image
+        """
+        return cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
+    def create_overlay(self, base_image: np.ndarray, mask: np.ndarray,
+                      color: Tuple[int, int, int] = (0, 255, 0),
+                      alpha: float = 0.5) -> np.ndarray:
+        """
+        Create overlay of mask on base image.
+        Args:
+            base_image: Base image
+            mask: Binary mask
+            color: Overlay color (B, G, R)
+            alpha: Overlay transparency
+        Returns:
+            Image with overlay
+        """
+        overlay = base_image.copy()
+        overlay[mask == 255] = color
+        return cv2.addWeighted(base_image, 1.0 - alpha, overlay, alpha, 0)
+    def validate_composite(self, composite: np.ndarray) -> bool:
+        """
+        Validate composite image.
+        Args:
+            composite: Composite image to validate
+        Returns:
+            True if valid, False otherwise
+        """
+        if composite is None:
+            return False
+        if not isinstance(composite, np.ndarray):
+            return False
+        if composite.ndim != 3 or composite.shape[2] != 3:
+            return False
+        if composite.dtype != np.uint8:
+            return False
+        return True

sorghum_pipeline/features/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""
+Feature extraction modules for the Sorghum Pipeline.
+This package contains all feature extraction functionality including:
+- Texture features (LBP, HOG, Lacunarity, EHD)
+- Vegetation indices
+- Morphological features
+- Spectral features
+"""
+from .texture import TextureExtractor
+from .vegetation import VegetationIndexExtractor
+from .morphology import MorphologyExtractor
+from .spectral import SpectralExtractor
+__all__ = [
+    "TextureExtractor",
+    "VegetationIndexExtractor",
+    "MorphologyExtractor",
+    "SpectralExtractor"
+]

sorghum_pipeline/features/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (714 Bytes). View file

sorghum_pipeline/features/__pycache__/morphology.cpython-312.pyc ADDED Viewed

Binary file (18.6 kB). View file

sorghum_pipeline/features/__pycache__/spectral.cpython-312.pyc ADDED Viewed

Binary file (18 kB). View file

sorghum_pipeline/features/__pycache__/texture.cpython-312.pyc ADDED Viewed

Binary file (18.7 kB). View file

sorghum_pipeline/features/__pycache__/vegetation.cpython-312.pyc ADDED Viewed

Binary file (25.1 kB). View file

sorghum_pipeline/features/morphology.py ADDED Viewed

	@@ -0,0 +1,380 @@

+"""
+Morphological feature extraction for the Sorghum Pipeline.
+This module handles extraction of morphological features using PlantCV
+and other computer vision techniques.
+"""
+import numpy as np
+import cv2
+import contextlib
+import sys
+from typing import Dict, Any, Optional, List, Tuple
+import logging
+# Try to import PlantCV, but don't fail if not available
+try:
+    from plantcv import plantcv as pcv
+    PLANT_CV_AVAILABLE = True
+except ImportError:
+    PLANT_CV_AVAILABLE = False
+    logger.warning("PlantCV not available. Morphological features will be limited.")
+logger = logging.getLogger(__name__)
+class MorphologyExtractor:
+    """Extracts morphological features from plant images."""
+    def __init__(self, pixel_to_cm: float = 0.1099609375, prune_sizes: List[int] = None):
+        """
+        Initialize morphology extractor.
+        Args:
+            pixel_to_cm: Conversion factor from pixels to centimeters
+            prune_sizes: List of pruning sizes for skeleton processing
+        """
+        self.pixel_to_cm = pixel_to_cm
+        self.prune_sizes = prune_sizes or [200, 100, 50, 30, 10]
+        if PLANT_CV_AVAILABLE:
+            # Configure PlantCV
+            pcv.params.debug = None
+            pcv.params.text_size = 0.7
+            pcv.params.text_thickness = 2
+            pcv.params.line_thickness = 3
+            pcv.params.dpi = 100
+    def extract_morphology_features(self, image: np.ndarray, mask: np.ndarray) -> Dict[str, Any]:
+        """
+        Extract morphological features from plant image and mask.
+        Args:
+            image: Plant image (BGR format)
+            mask: Binary mask of the plant
+        Returns:
+            Dictionary containing morphological features and images
+        """
+        features = {
+            'traits': {},
+            'images': {},
+            'success': False
+        }
+        try:
+            # Preprocess mask
+            clean_mask = self._preprocess_mask(mask)
+            if clean_mask is None:
+                logger.warning("Failed to preprocess mask")
+                return features
+            # Extract basic morphological features
+            basic_traits = self._extract_basic_features(clean_mask)
+            features['traits'].update(basic_traits)
+            # Extract skeleton-based features if PlantCV is available
+            if PLANT_CV_AVAILABLE:
+                skeleton_features = self._extract_skeleton_features(image, clean_mask)
+                features['traits'].update(skeleton_features['traits'])
+                features['images'].update(skeleton_features['images'])
+            else:
+                # Fallback to basic OpenCV features
+                cv_features = self._extract_opencv_features(image, clean_mask)
+                features['traits'].update(cv_features['traits'])
+                features['images'].update(cv_features['images'])
+            features['success'] = True
+            logger.debug("Morphological features extracted successfully")
+        except Exception as e:
+            logger.error(f"Morphological feature extraction failed: {e}")
+        return features
+    def _preprocess_mask(self, mask: np.ndarray) -> Optional[np.ndarray]:
+        """Preprocess mask for morphological analysis."""
+        if mask is None:
+            return None
+        # Convert to binary if needed
+        if isinstance(mask, tuple):
+            mask = mask[0]
+        # Ensure binary format
+        mask = ((mask.astype(np.int32) > 0).astype(np.uint8)) * 255
+        # Morphological opening to remove noise
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+        opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+        # Remove small connected components
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(opened, connectivity=8)
+        clean_mask = np.zeros_like(opened)
+        for label in range(1, num_labels):  # Skip background
+            if stats[label, cv2.CC_STAT_AREA] >= 1000:
+                clean_mask[labels == label] = 255
+        return clean_mask
+    def _extract_basic_features(self, mask: np.ndarray) -> Dict[str, float]:
+        """Extract basic morphological features using OpenCV."""
+        features = {}
+        try:
+            # Find contours
+            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if not contours:
+                return features
+            # Get the largest contour
+            largest_contour = max(contours, key=cv2.contourArea)
+            # Basic measurements
+            area = cv2.contourArea(largest_contour)
+            perimeter = cv2.arcLength(largest_contour, True)
+            # Bounding box
+            x, y, w, h = cv2.boundingRect(largest_contour)
+            bbox_area = w * h
+            # Ellipse fitting
+            if len(largest_contour) >= 5:
+                ellipse = cv2.fitEllipse(largest_contour)
+                (center, axes, angle) = ellipse
+                major_axis = max(axes)
+                minor_axis = min(axes)
+            else:
+                major_axis = max(w, h)
+                minor_axis = min(w, h)
+            # Convert to centimeters
+            features['area_cm2'] = area * (self.pixel_to_cm ** 2)
+            features['perimeter_cm'] = perimeter * self.pixel_to_cm
+            features['width_cm'] = w * self.pixel_to_cm
+            features['height_cm'] = h * self.pixel_to_cm
+            features['bbox_area_cm2'] = bbox_area * (self.pixel_to_cm ** 2)
+            features['major_axis_cm'] = major_axis * self.pixel_to_cm
+            features['minor_axis_cm'] = minor_axis * self.pixel_to_cm
+            features['aspect_ratio'] = w / h if h > 0 else 0
+            features['elongation'] = major_axis / minor_axis if minor_axis > 0 else 0
+            features['circularity'] = (4 * np.pi * area) / (perimeter ** 2) if perimeter > 0 else 0
+            features['solidity'] = area / bbox_area if bbox_area > 0 else 0
+            # Convex hull
+            hull = cv2.convexHull(largest_contour)
+            hull_area = cv2.contourArea(hull)
+            features['convexity'] = area / hull_area if hull_area > 0 else 0
+        except Exception as e:
+            logger.error(f"Basic feature extraction failed: {e}")
+        return features
+    def _extract_skeleton_features(self, image: np.ndarray, mask: np.ndarray) -> Dict[str, Any]:
+        """Extract skeleton-based features using PlantCV."""
+        features = {'traits': {}, 'images': {}}
+        if not PLANT_CV_AVAILABLE:
+            return features
+        try:
+            # Suppress PlantCV output
+            with contextlib.redirect_stdout(self._FilteredStream(sys.stdout)), \
+                 contextlib.redirect_stderr(self._FilteredStream(sys.stderr)):
+                # Skeletonize
+                skeleton = pcv.morphology.skeletonize(mask=mask)
+                features['images']['skeleton'] = skeleton
+                # Prune skeleton
+                pruned_skel = skeleton
+                for size in self.prune_sizes:
+                    pruned_skel, _, _ = pcv.morphology.prune(
+                        skel_img=pruned_skel, size=size, mask=mask
+                    )
+                features['images']['pruned_skeleton'] = pruned_skel
+                # Find branch points and tips
+                branch_pts = pcv.morphology.find_branch_pts(pruned_skel, mask)
+                features['images']['branch_points'] = branch_pts
+                try:
+                    tip_pts = pcv.morphology.find_tips(pruned_skel, mask)
+                    features['images']['tip_points'] = tip_pts
+                except Exception as e:
+                    logger.warning(f"Tip detection failed: {e}")
+                # Segment objects
+                try:
+                    leaf_obj, stem_obj = pcv.morphology.segment_sort(
+                        pruned_skel, [], mask
+                    )
+                    features['traits']['num_leaves'] = len(leaf_obj)
+                    features['traits']['num_stems'] = len(stem_obj)
+                except Exception as e:
+                    logger.warning(f"Object segmentation failed: {e}")
+                    features['traits']['num_leaves'] = 0
+                    features['traits']['num_stems'] = 0
+                # Size analysis
+                try:
+                    labeled_mask, n_labels = pcv.create_labels(mask)
+                    size_analysis = pcv.analyze.size(image, labeled_mask, n_labels, label="default")
+                    features['images']['size_analysis'] = size_analysis
+                    # Get size traits
+                    obs = pcv.outputs.observations.get("default_1", {})
+                    for trait, info in obs.items():
+                        if trait not in ["in_bounds", "object_in_frame"]:
+                            val = info.get("value", None)
+                            if val is not None:
+                                if trait == "area":
+                                    val = val * (self.pixel_to_cm ** 2)
+                                elif trait in ["perimeter", "width", "height", "longest_path",
+                                            "ellipse_major_axis", "ellipse_minor_axis"]:
+                                    val = val * self.pixel_to_cm
+                                features['traits'][trait] = val
+                except Exception as e:
+                    logger.warning(f"Size analysis failed: {e}")
+        except Exception as e:
+            logger.error(f"Skeleton feature extraction failed: {e}")
+        return features
+    def _extract_opencv_features(self, image: np.ndarray, mask: np.ndarray) -> Dict[str, Any]:
+        """Extract features using only OpenCV (fallback when PlantCV is not available)."""
+        features = {'traits': {}, 'images': {}}
+        try:
+            # Create skeleton using OpenCV
+            skeleton = self._create_skeleton_opencv(mask)
+            features['images']['skeleton'] = skeleton
+            # Find branch points
+            branch_points = self._find_branch_points_opencv(skeleton)
+            features['images']['branch_points'] = branch_points
+            features['traits']['num_branches'] = len(branch_points)
+            # Find endpoints
+            endpoints = self._find_endpoints_opencv(skeleton)
+            features['images']['endpoints'] = endpoints
+            features['traits']['num_endpoints'] = len(endpoints)
+            # Skeleton length
+            skeleton_length = np.sum(skeleton > 0)
+            features['traits']['skeleton_length_pixels'] = skeleton_length
+            features['traits']['skeleton_length_cm'] = skeleton_length * self.pixel_to_cm
+        except Exception as e:
+            logger.error(f"OpenCV feature extraction failed: {e}")
+        return features
+    def _create_skeleton_opencv(self, mask: np.ndarray) -> np.ndarray:
+        """Create skeleton using OpenCV."""
+        # Convert to binary
+        binary = (mask > 0).astype(np.uint8)
+        # Create skeleton using morphological operations
+        skeleton = np.zeros_like(binary)
+        element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
+        while True:
+            eroded = cv2.erode(binary, element)
+            temp = cv2.dilate(eroded, element)
+            temp = cv2.subtract(binary, temp)
+            skeleton = cv2.bitwise_or(skeleton, temp)
+            binary = eroded.copy()
+            if cv2.countNonZero(binary) == 0:
+                break
+        return skeleton * 255
+    def _find_branch_points_opencv(self, skeleton: np.ndarray) -> List[Tuple[int, int]]:
+        """Find branch points in skeleton using OpenCV."""
+        # Count neighbors for each pixel
+        kernel = np.ones((3, 3), dtype=np.uint8)
+        kernel[1, 1] = 0  # Don't count center pixel
+        neighbor_count = cv2.filter2D(skeleton, -1, kernel)
+        # Branch points have 3 or more neighbors
+        branch_points = np.where((skeleton > 0) & (neighbor_count >= 3))
+        return list(zip(branch_points[1], branch_points[0]))  # (x, y) format
+    def _find_endpoints_opencv(self, skeleton: np.ndarray) -> List[Tuple[int, int]]:
+        """Find endpoints in skeleton using OpenCV."""
+        # Count neighbors for each pixel
+        kernel = np.ones((3, 3), dtype=np.uint8)
+        kernel[1, 1] = 0  # Don't count center pixel
+        neighbor_count = cv2.filter2D(skeleton, -1, kernel)
+        # Endpoints have exactly 1 neighbor
+        endpoints = np.where((skeleton > 0) & (neighbor_count == 1))
+        return list(zip(endpoints[1], endpoints[0]))  # (x, y) format
+    class _FilteredStream:
+        """Filter PlantCV output to reduce noise."""
+        def __init__(self, stream):
+            self.stream = stream
+        def write(self, msg):
+            skip = ("got pruned", "Slope of contour", "cannot be plotted")
+            if not any(s in msg for s in skip):
+                self.stream.write(msg)
+        def flush(self):
+            try:
+                self.stream.flush()
+            except Exception:
+                pass
+    def create_morphology_visualization(self, image: np.ndarray, mask: np.ndarray,
+                                      features: Dict[str, Any]) -> np.ndarray:
+        """
+        Create visualization of morphological features.
+        Args:
+            image: Original image
+            mask: Binary mask
+            features: Extracted features
+        Returns:
+            Visualization image
+        """
+        try:
+            # Create visualization
+            vis = image.copy()
+            # Draw mask outline
+            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            cv2.drawContours(vis, contours, -1, (0, 255, 0), 2)
+            # Draw bounding box
+            if contours:
+                x, y, w, h = cv2.boundingRect(contours[0])
+                cv2.rectangle(vis, (x, y), (x + w, y + h), (255, 0, 0), 2)
+            # Draw skeleton if available
+            if 'skeleton' in features.get('images', {}):
+                skeleton = features['images']['skeleton']
+                vis[skeleton > 0] = [0, 0, 255]  # Red skeleton
+            # Draw branch points if available
+            if 'branch_points' in features.get('images', {}):
+                branch_img = features['images']['branch_points']
+                vis[branch_img > 0] = [255, 255, 0]  # Yellow branch points
+            return vis
+        except Exception as e:
+            logger.error(f"Visualization creation failed: {e}")
+            return image

sorghum_pipeline/features/spectral.py ADDED Viewed

	@@ -0,0 +1,383 @@

+"""
+Spectral feature extraction for the Sorghum Pipeline.
+This module handles extraction of spectral features and analysis
+of multispectral data.
+"""
+import numpy as np
+import cv2
+from sklearn.decomposition import PCA
+from typing import Dict, Any, Optional, List, Tuple
+import logging
+logger = logging.getLogger(__name__)
+class SpectralExtractor:
+    """Extracts spectral features from multispectral data."""
+    def __init__(self, n_components: int = 3):
+        """
+        Initialize spectral extractor.
+        Args:
+            n_components: Number of PCA components to extract
+        """
+        self.n_components = n_components
+    def extract_spectral_features(self, spectral_stack: Dict[str, np.ndarray],
+                                mask: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """
+        Extract spectral features from multispectral data.
+        Args:
+            spectral_stack: Dictionary of spectral bands
+            mask: Optional binary mask
+        Returns:
+            Dictionary containing spectral features
+        """
+        features = {}
+        try:
+            # Extract individual band features
+            features['band_features'] = self._extract_band_features(spectral_stack, mask)
+            # Extract PCA features
+            features['pca_features'] = self._extract_pca_features(spectral_stack, mask)
+            # Extract spectral indices
+            features['spectral_indices'] = self._extract_spectral_indices(spectral_stack, mask)
+            # Extract texture features from spectral bands
+            features['spectral_texture'] = self._extract_spectral_texture(spectral_stack, mask)
+            logger.debug("Spectral features extracted successfully")
+        except Exception as e:
+            logger.error(f"Spectral feature extraction failed: {e}")
+        return features
+    def _extract_band_features(self, spectral_stack: Dict[str, np.ndarray],
+                             mask: Optional[np.ndarray] = None) -> Dict[str, Dict[str, float]]:
+        """Extract features from individual spectral bands."""
+        band_features = {}
+        for band_name, band_data in spectral_stack.items():
+            try:
+                # Squeeze to 2D if needed
+                if band_data.ndim > 2:
+                    band_data = band_data.squeeze()
+                # Apply mask if provided
+                if mask is not None and mask.shape == band_data.shape:
+                    masked_data = np.where(mask > 0, band_data, np.nan)
+                else:
+                    masked_data = band_data
+                # Compute statistics
+                valid_data = masked_data[~np.isnan(masked_data)]
+                if len(valid_data) > 0:
+                    band_features[band_name] = {
+                        'mean': float(np.mean(valid_data)),
+                        'std': float(np.std(valid_data)),
+                        'min': float(np.min(valid_data)),
+                        'max': float(np.max(valid_data)),
+                        'median': float(np.median(valid_data)),
+                        'q25': float(np.percentile(valid_data, 25)),
+                        'q75': float(np.percentile(valid_data, 75)),
+                        'skewness': float(self._compute_skewness(valid_data)),
+                        'kurtosis': float(self._compute_kurtosis(valid_data)),
+                        'entropy': float(self._compute_entropy(valid_data))
+                    }
+                else:
+                    band_features[band_name] = {
+                        'mean': 0.0, 'std': 0.0, 'min': 0.0, 'max': 0.0,
+                        'median': 0.0, 'q25': 0.0, 'q75': 0.0,
+                        'skewness': 0.0, 'kurtosis': 0.0, 'entropy': 0.0
+                    }
+            except Exception as e:
+                logger.error(f"Band feature extraction failed for {band_name}: {e}")
+                band_features[band_name] = {}
+        return band_features
+    def _extract_pca_features(self, spectral_stack: Dict[str, np.ndarray],
+                            mask: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """Extract PCA features from spectral data."""
+        try:
+            # Stack all bands
+            band_names = ['nir', 'red_edge', 'red', 'green']
+            band_data = []
+            for band_name in band_names:
+                if band_name in spectral_stack:
+                    arr = spectral_stack[band_name].squeeze().astype(float)
+                    if mask is not None and mask.shape == arr.shape:
+                        arr = np.where(mask > 0, arr, np.nan)
+                    band_data.append(arr)
+            if not band_data:
+                return {}
+            # Stack bands
+            full_stack = np.stack(band_data, axis=-1)
+            h, w, c = full_stack.shape
+            # Reshape for PCA
+            flat_data = full_stack.reshape(-1, c)
+            valid_mask = ~np.isnan(flat_data).any(axis=1)
+            if valid_mask.sum() == 0:
+                return {}
+            # Apply PCA
+            valid_data = flat_data[valid_mask]
+            pca = PCA(n_components=min(self.n_components, valid_data.shape[1]))
+            pca_result = pca.fit_transform(valid_data)
+            # Create full result array
+            full_result = np.full((h * w, self.n_components), np.nan)
+            full_result[valid_mask] = pca_result
+            # Reshape back to image dimensions
+            pca_components = {}
+            for i in range(self.n_components):
+                component = full_result[:, i].reshape(h, w)
+                pca_components[f'pca_{i+1}'] = component
+                # Compute statistics for this component
+                valid_component = component[~np.isnan(component)]
+                if len(valid_component) > 0:
+                    pca_components[f'pca_{i+1}_stats'] = {
+                        'mean': float(np.mean(valid_component)),
+                        'std': float(np.std(valid_component)),
+                        'min': float(np.min(valid_component)),
+                        'max': float(np.max(valid_component))
+                    }
+            # Add PCA metadata
+            pca_components['explained_variance_ratio'] = pca.explained_variance_ratio_.tolist()
+            pca_components['total_variance_explained'] = float(np.sum(pca.explained_variance_ratio_))
+            return pca_components
+        except Exception as e:
+            logger.error(f"PCA feature extraction failed: {e}")
+            return {}
+    def _extract_spectral_indices(self, spectral_stack: Dict[str, np.ndarray],
+                                mask: Optional[np.ndarray] = None) -> Dict[str, np.ndarray]:
+        """Extract basic spectral indices."""
+        indices = {}
+        try:
+            # Get required bands
+            nir = spectral_stack.get('nir', None)
+            red = spectral_stack.get('red', None)
+            green = spectral_stack.get('green', None)
+            red_edge = spectral_stack.get('red_edge', None)
+            if nir is not None:
+                nir = nir.squeeze().astype(float)
+            if red is not None:
+                red = red.squeeze().astype(float)
+            if green is not None:
+                green = green.squeeze().astype(float)
+            if red_edge is not None:
+                red_edge = red_edge.squeeze().astype(float)
+            # Apply mask
+            if mask is not None:
+                if nir is not None and mask.shape == nir.shape:
+                    nir = np.where(mask > 0, nir, np.nan)
+                if red is not None and mask.shape == red.shape:
+                    red = np.where(mask > 0, red, np.nan)
+                if green is not None and mask.shape == green.shape:
+                    green = np.where(mask > 0, green, np.nan)
+                if red_edge is not None and mask.shape == red_edge.shape:
+                    red_edge = np.where(mask > 0, red_edge, np.nan)
+            # Compute basic indices
+            if nir is not None and red is not None:
+                indices['nir_red_ratio'] = nir / (red + 1e-10)
+                indices['nir_red_diff'] = nir - red
+            if nir is not None and green is not None:
+                indices['nir_green_ratio'] = nir / (green + 1e-10)
+                indices['nir_green_diff'] = nir - green
+            if red is not None and green is not None:
+                indices['red_green_ratio'] = red / (green + 1e-10)
+                indices['red_green_diff'] = red - green
+            if nir is not None and red_edge is not None:
+                indices['nir_red_edge_ratio'] = nir / (red_edge + 1e-10)
+                indices['nir_red_edge_diff'] = nir - red_edge
+            # Compute band ratios
+            if nir is not None and red is not None and green is not None:
+                indices['nir_red_green_sum'] = nir + red + green
+                indices['nir_red_green_mean'] = (nir + red + green) / 3
+        except Exception as e:
+            logger.error(f"Spectral index extraction failed: {e}")
+        return indices
+    def _extract_spectral_texture(self, spectral_stack: Dict[str, np.ndarray],
+                                mask: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """Extract texture features from spectral bands."""
+        texture_features = {}
+        try:
+            from .texture import TextureExtractor
+            texture_extractor = TextureExtractor()
+            for band_name, band_data in spectral_stack.items():
+                try:
+                    # Prepare grayscale image
+                    gray_data = band_data.squeeze().astype(float)
+                    # Apply mask
+                    if mask is not None and mask.shape == gray_data.shape:
+                        gray_data = np.where(mask > 0, gray_data, np.nan)
+                    # Normalize to 0-255
+                    valid_data = gray_data[~np.isnan(gray_data)]
+                    if len(valid_data) > 0:
+                        m, M = np.min(valid_data), np.max(valid_data)
+                        if M > m:
+                            normalized = ((gray_data - m) / (M - m) * 255).astype(np.uint8)
+                        else:
+                            normalized = np.zeros_like(gray_data, dtype=np.uint8)
+                    else:
+                        normalized = np.zeros_like(gray_data, dtype=np.uint8)
+                    # Extract texture features
+                    band_texture = texture_extractor.extract_all_texture_features(normalized)
+                    texture_features[band_name] = band_texture
+                except Exception as e:
+                    logger.error(f"Spectral texture extraction failed for {band_name}: {e}")
+                    texture_features[band_name] = {}
+        except ImportError:
+            logger.warning("TextureExtractor not available for spectral texture analysis")
+        return texture_features
+    def _compute_skewness(self, data: np.ndarray) -> float:
+        """Compute skewness of data."""
+        if len(data) < 3:
+            return 0.0
+        mean = np.mean(data)
+        std = np.std(data)
+        if std == 0:
+            return 0.0
+        return np.mean(((data - mean) / std) ** 3)
+    def _compute_kurtosis(self, data: np.ndarray) -> float:
+        """Compute kurtosis of data."""
+        if len(data) < 4:
+            return 0.0
+        mean = np.mean(data)
+        std = np.std(data)
+        if std == 0:
+            return 0.0
+        return np.mean(((data - mean) / std) ** 4) - 3
+    def _compute_entropy(self, data: np.ndarray) -> float:
+        """Compute entropy of data."""
+        if len(data) == 0:
+            return 0.0
+        # Create histogram
+        hist, _ = np.histogram(data, bins=256, range=(0, 256))
+        hist = hist / np.sum(hist)  # Normalize
+        # Remove zero probabilities
+        hist = hist[hist > 0]
+        # Compute entropy
+        return -np.sum(hist * np.log2(hist))
+    def create_spectral_visualization(self, spectral_stack: Dict[str, np.ndarray],
+                                    pca_features: Dict[str, Any]) -> np.ndarray:
+        """
+        Create visualization of spectral features.
+        Args:
+            spectral_stack: Original spectral data
+            pca_features: PCA features
+        Returns:
+            Visualization image
+        """
+        try:
+            # Preferred visualization: RGB = (Red, Red-Edge, Green)
+            if 'red' in spectral_stack and 'red_edge' in spectral_stack and 'green' in spectral_stack:
+                red = spectral_stack['red'].squeeze()
+                red_edge = spectral_stack['red_edge'].squeeze()
+                green = spectral_stack['green'].squeeze()
+                # Normalize each band
+                red_norm = self._normalize_band(red)
+                red_edge_norm = self._normalize_band(red_edge)
+                green_norm = self._normalize_band(green)
+                # Create composite (Red, Red-Edge, Green)
+                rgb_composite = np.stack([red_norm, red_edge_norm, green_norm], axis=-1)
+                return rgb_composite.astype(np.uint8)
+            # Fallback visualization: RGB = (NIR, Red, Green)
+            if 'red' in spectral_stack and 'green' in spectral_stack and 'nir' in spectral_stack:
+                red = spectral_stack['red'].squeeze()
+                green = spectral_stack['green'].squeeze()
+                nir = spectral_stack['nir'].squeeze()
+                # Normalize each band
+                red_norm = self._normalize_band(red)
+                green_norm = self._normalize_band(green)
+                nir_norm = self._normalize_band(nir)
+                rgb_composite = np.stack([nir_norm, red_norm, green_norm], axis=-1)
+                return rgb_composite.astype(np.uint8)
+            # Fallback to first PCA component
+            elif 'pca_1' in pca_features:
+                pca1 = pca_features['pca_1']
+                pca1_norm = self._normalize_band(pca1)
+                return np.stack([pca1_norm, pca1_norm, pca1_norm], axis=-1).astype(np.uint8)
+            else:
+                # Return empty image
+                return np.zeros((100, 100, 3), dtype=np.uint8)
+        except Exception as e:
+            logger.error(f"Spectral visualization creation failed: {e}")
+            return np.zeros((100, 100, 3), dtype=np.uint8)
+    def _normalize_band(self, band: np.ndarray) -> np.ndarray:
+        """Normalize band to 0-255 range."""
+        valid_data = band[~np.isnan(band)]
+        if len(valid_data) == 0:
+            return np.zeros_like(band, dtype=np.uint8)
+        m, M = np.min(valid_data), np.max(valid_data)
+        if M > m:
+            normalized = ((band - m) / (M - m) * 255).astype(np.uint8)
+        else:
+            normalized = np.zeros_like(band, dtype=np.uint8)
+        return normalized

sorghum_pipeline/features/texture.py ADDED Viewed

	@@ -0,0 +1,373 @@

+"""
+Texture feature extraction for the Sorghum Pipeline.
+This module handles extraction of texture features including:
+- Local Binary Patterns (LBP)
+- Histogram of Oriented Gradients (HOG)
+- Lacunarity features
+- Edge Histogram Descriptor (EHD)
+"""
+import numpy as np
+import cv2
+import torch
+import torch.nn.functional as F
+from skimage.feature import local_binary_pattern, hog
+from skimage import exposure
+from scipy import ndimage, signal
+from sklearn.decomposition import PCA
+from typing import Dict, Tuple, Optional, Any
+import logging
+logger = logging.getLogger(__name__)
+class TextureExtractor:
+    """Extracts texture features from images."""
+    def __init__(self,
+                 lbp_points: int = 8,
+                 lbp_radius: int = 1,
+                 hog_orientations: int = 9,
+                 hog_pixels_per_cell: Tuple[int, int] = (8, 8),
+                 hog_cells_per_block: Tuple[int, int] = (2, 2),
+                 lacunarity_window: int = 15,
+                 ehd_threshold: float = 0.3,
+                 angle_resolution: int = 45):
+        """
+        Initialize texture extractor.
+        Args:
+            lbp_points: Number of points for LBP
+            lbp_radius: Radius for LBP
+            hog_orientations: Number of orientations for HOG
+            hog_pixels_per_cell: Pixels per cell for HOG
+            hog_cells_per_block: Cells per block for HOG
+            lacunarity_window: Window size for lacunarity
+            ehd_threshold: Threshold for EHD
+            angle_resolution: Angle resolution for EHD
+        """
+        self.lbp_points = lbp_points
+        self.lbp_radius = lbp_radius
+        self.hog_orientations = hog_orientations
+        self.hog_pixels_per_cell = hog_pixels_per_cell
+        self.hog_cells_per_block = hog_cells_per_block
+        self.lacunarity_window = lacunarity_window
+        self.ehd_threshold = ehd_threshold
+        self.angle_resolution = angle_resolution
+    def extract_lbp(self, gray_image: np.ndarray) -> np.ndarray:
+        """
+        Extract Local Binary Pattern features.
+        Args:
+            gray_image: Grayscale input image
+        Returns:
+            LBP feature map
+        """
+        try:
+            lbp = local_binary_pattern(
+                gray_image,
+                self.lbp_points,
+                self.lbp_radius,
+                method='uniform'
+            )
+            return self._convert_to_uint8(lbp)
+        except Exception as e:
+            logger.error(f"LBP extraction failed: {e}")
+            return np.zeros_like(gray_image, dtype=np.uint8)
+    def extract_hog(self, gray_image: np.ndarray) -> np.ndarray:
+        """
+        Extract Histogram of Oriented Gradients features.
+        Args:
+            gray_image: Grayscale input image
+        Returns:
+            HOG feature map
+        """
+        try:
+            _, vis = hog(
+                gray_image,
+                orientations=self.hog_orientations,
+                pixels_per_cell=self.hog_pixels_per_cell,
+                cells_per_block=self.hog_cells_per_block,
+                visualize=True,
+                feature_vector=True
+            )
+            return exposure.rescale_intensity(vis, out_range=(0, 255)).astype(np.uint8)
+        except Exception as e:
+            logger.error(f"HOG extraction failed: {e}")
+            return np.zeros_like(gray_image, dtype=np.uint8)
+    def compute_local_lacunarity(self, gray_image: np.ndarray, window_size: int) -> np.ndarray:
+        """
+        Compute local lacunarity.
+        Args:
+            gray_image: Grayscale input image
+            window_size: Size of the sliding window
+        Returns:
+            Local lacunarity map
+        """
+        try:
+            arr = gray_image.astype(np.float32)
+            m1 = ndimage.uniform_filter(arr, size=window_size)
+            m2 = ndimage.uniform_filter(arr * arr, size=window_size)
+            var = m2 - m1 * m1
+            eps = 1e-6
+            lac = var / (m1 * m1 + eps) + 1
+            lac[m1 <= eps] = 0
+            return lac
+        except Exception as e:
+            logger.error(f"Local lacunarity computation failed: {e}")
+            return np.zeros_like(gray_image, dtype=np.float32)
+    def compute_lacunarity_features(self, gray_image: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Compute three types of lacunarity features.
+        Args:
+            gray_image: Grayscale input image
+        Returns:
+            Tuple of (lac1, lac2, lac3) lacunarity maps
+        """
+        try:
+            # L1: Single window lacunarity
+            lac1 = self.compute_local_lacunarity(gray_image, self.lacunarity_window)
+            # L2: Multi-scale lacunarity
+            scales = [max(3, self.lacunarity_window//2), self.lacunarity_window, self.lacunarity_window*2]
+            lac2 = np.mean([
+                self.compute_local_lacunarity(gray_image, s) for s in scales
+            ], axis=0)
+            # L3: DBC Lacunarity (if available)
+            try:
+                from ..models.dbc_lacunarity import DBC_Lacunarity
+                x = torch.from_numpy(gray_image.astype(np.float32)/255.0)[None, None]
+                layer = DBC_Lacunarity(window_size=self.lacunarity_window).eval()
+                with torch.no_grad():
+                    lac3 = layer(x).squeeze().cpu().numpy()
+            except ImportError:
+                logger.warning("DBC Lacunarity not available, using L2 as L3")
+                lac3 = lac2.copy()
+            return (
+                self._convert_to_uint8(lac1),
+                self._convert_to_uint8(lac2),
+                self._convert_to_uint8(lac3)
+            )
+        except Exception as e:
+            logger.error(f"Lacunarity features computation failed: {e}")
+            empty = np.zeros_like(gray_image, dtype=np.uint8)
+            return empty, empty, empty
+    def generate_ehd_masks(self, mask_size: int = 3) -> np.ndarray:
+        """
+        Generate masks for Edge Histogram Descriptor.
+        Args:
+            mask_size: Size of the mask
+        Returns:
+            Array of EHD masks
+        """
+        if mask_size < 3:
+            mask_size = 3
+        if mask_size % 2 == 0:
+            mask_size += 1
+        # Base gradient mask
+        Gy = np.outer([1, 0, -1], [1, 2, 1])
+        # Expand if needed
+        if mask_size > 3:
+            expd = np.outer([1, 2, 1], [1, 2, 1])
+            for _ in range((mask_size - 3) // 2):
+                Gy = signal.convolve2d(expd, Gy, mode='full')
+        # Generate masks for different angles
+        angles = np.arange(0, 360, self.angle_resolution)
+        masks = np.zeros((len(angles), mask_size, mask_size), dtype=np.float32)
+        for i, angle in enumerate(angles):
+            masks[i] = ndimage.rotate(Gy, angle, reshape=False, mode='nearest')
+        return masks
+    def extract_ehd_features(self, gray_image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Extract Edge Histogram Descriptor features.
+        Args:
+            gray_image: Grayscale input image
+        Returns:
+            Tuple of (ehd_features, ehd_map)
+        """
+        try:
+            # Generate masks
+            masks = self.generate_ehd_masks()
+            # Convert to tensor
+            X = torch.from_numpy(gray_image.astype(np.float32)/255.0).unsqueeze(0).unsqueeze(0)
+            masks_tensor = torch.tensor(masks).unsqueeze(1).float()
+            # Convolve with masks
+            edge_responses = F.conv2d(X, masks_tensor, dilation=7)
+            # Find maximum response
+            values, indices = torch.max(edge_responses, dim=1)
+            indices[values < self.ehd_threshold] = masks.shape[0]
+            # Pool features
+            feat_vect = []
+            for edge in range(masks.shape[0] + 1):
+                pooled = F.avg_pool2d(
+                    (indices == edge).unsqueeze(1).float(),
+                    kernel_size=5, stride=1, padding=2
+                )
+                feat_vect.append(pooled.squeeze(1))
+            ehd_features = torch.stack(feat_vect, dim=1).squeeze(0).cpu().numpy()
+            ehd_map = np.argmax(ehd_features, axis=0).astype(np.uint8)
+            return ehd_features, ehd_map
+        except Exception as e:
+            logger.error(f"EHD features extraction failed: {e}")
+            empty_features = np.zeros((9, gray_image.shape[0]-4, gray_image.shape[1]-4), dtype=np.float32)
+            empty_map = np.zeros_like(gray_image, dtype=np.uint8)
+            return empty_features, empty_map
+    def extract_all_texture_features(self, gray_image: np.ndarray) -> Dict[str, np.ndarray]:
+        """
+        Extract all texture features from a grayscale image.
+        Args:
+            gray_image: Grayscale input image
+        Returns:
+            Dictionary of texture features
+        """
+        features = {}
+        try:
+            # LBP
+            features['lbp'] = self.extract_lbp(gray_image)
+            # HOG
+            features['hog'] = self.extract_hog(gray_image)
+            # Lacunarity
+            lac1, lac2, lac3 = self.compute_lacunarity_features(gray_image)
+            features['lac1'] = lac1
+            features['lac2'] = lac2
+            features['lac3'] = lac3
+            # EHD
+            ehd_features, ehd_map = self.extract_ehd_features(gray_image)
+            features['ehd_features'] = ehd_features
+            features['ehd_map'] = ehd_map
+            logger.debug("All texture features extracted successfully")
+        except Exception as e:
+            logger.error(f"Texture feature extraction failed: {e}")
+            # Return empty features
+            features = {
+                'lbp': np.zeros_like(gray_image, dtype=np.uint8),
+                'hog': np.zeros_like(gray_image, dtype=np.uint8),
+                'lac1': np.zeros_like(gray_image, dtype=np.uint8),
+                'lac2': np.zeros_like(gray_image, dtype=np.uint8),
+                'lac3': np.zeros_like(gray_image, dtype=np.uint8),
+                'ehd_features': np.zeros((9, gray_image.shape[0]-4, gray_image.shape[1]-4), dtype=np.float32),
+                'ehd_map': np.zeros_like(gray_image, dtype=np.uint8)
+            }
+        return features
+    def _convert_to_uint8(self, arr: np.ndarray) -> np.ndarray:
+        """Convert array to uint8 with proper normalization."""
+        arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
+        if arr.ptp() > 0:
+            normalized = (arr - arr.min()) / (arr.ptp() + 1e-6) * 255
+        else:
+            normalized = np.zeros_like(arr)
+        return np.clip(normalized, 0, 255).astype(np.uint8)
+    def compute_texture_statistics(self, features: Dict[str, np.ndarray],
+                                 mask: Optional[np.ndarray] = None) -> Dict[str, Dict[str, float]]:
+        """
+        Compute statistics for texture features.
+        Args:
+            features: Dictionary of texture features
+            mask: Optional mask to apply
+        Returns:
+            Dictionary of feature statistics
+        """
+        stats = {}
+        for feature_name, feature_data in features.items():
+            if feature_name == 'ehd_features':
+                # Special handling for EHD features
+                if mask is not None:
+                    # Apply mask to each channel
+                    masked_features = []
+                    for i in range(feature_data.shape[0]):
+                        channel = feature_data[i]
+                        if mask.shape != channel.shape:
+                            # Resize mask to match channel
+                            mask_resized = cv2.resize(mask, (channel.shape[1], channel.shape[0]),
+                                                    interpolation=cv2.INTER_NEAREST)
+                            masked_channel = np.where(mask_resized > 0, channel, np.nan)
+                        else:
+                            masked_channel = np.where(mask > 0, channel, np.nan)
+                        masked_features.append(masked_channel)
+                    feature_data = np.stack(masked_features, axis=0)
+                else:
+                    feature_data = feature_data
+                # Compute statistics for each EHD channel
+                channel_stats = {}
+                for i in range(feature_data.shape[0]):
+                    channel = feature_data[i]
+                    valid_data = channel[~np.isnan(channel)]
+                    if len(valid_data) > 0:
+                        channel_stats[f'channel_{i}'] = {
+                            'mean': float(np.mean(valid_data)),
+                            'std': float(np.std(valid_data)),
+                            'min': float(np.min(valid_data)),
+                            'max': float(np.max(valid_data)),
+                            'median': float(np.median(valid_data))
+                        }
+                stats[feature_name] = channel_stats
+            else:
+                # Regular 2D features
+                if mask is not None and mask.shape == feature_data.shape:
+                    masked_data = np.where(mask > 0, feature_data, np.nan)
+                else:
+                    masked_data = feature_data
+                valid_data = masked_data[~np.isnan(masked_data)]
+                if len(valid_data) > 0:
+                    stats[feature_name] = {
+                        'mean': float(np.mean(valid_data)),
+                        'std': float(np.std(valid_data)),
+                        'min': float(np.min(valid_data)),
+                        'max': float(np.max(valid_data)),
+                        'median': float(np.median(valid_data))
+                    }
+                else:
+                    stats[feature_name] = {
+                        'mean': 0.0, 'std': 0.0, 'min': 0.0, 'max': 0.0, 'median': 0.0
+                    }
+        return stats

sorghum_pipeline/features/vegetation.py ADDED Viewed

	@@ -0,0 +1,308 @@

+"""
+Vegetation index extraction for the Sorghum Pipeline.
+This module handles extraction of various vegetation indices
+from multispectral data.
+"""
+import numpy as np
+import cv2
+from typing import Dict, Tuple, Optional, Any
+import logging
+logger = logging.getLogger(__name__)
+class VegetationIndexExtractor:
+    """Extracts vegetation indices from spectral data."""
+    def __init__(self, epsilon: float = 1e-10, soil_factor: float = 0.16):
+        """
+        Initialize vegetation index extractor.
+        Args:
+            epsilon: Small value to avoid division by zero
+            soil_factor: Soil factor for certain indices
+        """
+        # Coerce to float in case config passed strings like "1e-10"
+        try:
+            self.epsilon = float(epsilon)
+        except Exception:
+            self.epsilon = 1e-10
+        try:
+            self.soil_factor = float(soil_factor)
+        except Exception:
+            self.soil_factor = 0.16
+        # Define vegetation index formulas
+        self.index_formulas = {
+            "NDVI": lambda nir, red: (nir - red) / (nir + red + self.epsilon),
+            "GNDVI": lambda nir, green: (nir - green) / (nir + green + self.epsilon),
+            "NDRE": lambda nir, red_edge: (nir - red_edge) / (nir + red_edge + self.epsilon),
+            "GRNDVI": lambda nir, green, red: (nir - (green + red)) / (nir + (green + red) + self.epsilon),
+            "TNDVI": lambda nir, red: np.sqrt(np.clip(((nir - red) / (nir + red + self.epsilon)) + 0.5, 0, None)),
+            "MGRVI": lambda green, red: (green**2 - red**2) / (green**2 + red**2 + self.epsilon),
+            "GRVI": lambda nir, green: nir / (green + self.epsilon),
+            "NGRDI": lambda green, red: (green - red) / (green + red + self.epsilon),
+            "MSAVI": lambda nir, red: 0.5 * (2.0 * nir + 1 - np.sqrt((2 * nir + 1)**2 - 8 * (nir - red))),
+            "OSAVI": lambda nir, red: (nir - red) / (nir + red + self.soil_factor + self.epsilon),
+            "TSAVI": lambda nir, red, s=0.33, a=0.5, X=1.5: (s * (nir - s * red - a)) / (a * nir + red - a * s + X * (1 + s**2) + self.epsilon),
+            "GSAVI": lambda nir, green, l=0.5: (1 + l) * (nir - green) / (nir + green + l + self.epsilon),
+            # Requested additions and aliases
+            "GOSAVI": lambda nir, green: (nir - green) / (nir + green + 0.16 + self.epsilon),
+            "GDVI": lambda nir, green: nir - green,
+            "NDWI": lambda green, nir: (green - nir) / (green + nir + self.epsilon),
+            "DSWI4": lambda green, red: green / (red + self.epsilon),
+            "CIRE": lambda nir, red_edge: (nir / (red_edge + self.epsilon)) - 1.0,
+            "LCI": lambda nir, red_edge: (nir - red_edge) / (nir + red_edge + self.epsilon),
+            "CIgreen": lambda nir, green: (nir / (green + self.epsilon)) - 1,
+            "MCARI": lambda red_edge, red, green: ((red_edge - red) - 0.2 * (red_edge - green)) * (red_edge / (red + self.epsilon)),
+            "MCARI1": lambda nir, red, green: 1.2 * (2.5 * (nir - red) - 1.3 * (nir - green)),
+            "MCARI2": lambda nir, red, green: (1.5 * (2.5 * (nir - red) - 1.3 * (nir - green))) / np.sqrt((2 * nir + 1)**2 - (6 * nir - 5 * np.sqrt(red + self.epsilon))),
+            # MTVI variants per request
+            "MTVI1": lambda nir, red, green: 1.2 * (1.2 * (nir - green) - 2.5 * (red - green)),
+            "MTVI2": lambda nir, red, green: (1.5 * (1.2 * (nir - green) - 2.5 * (red - green))) / np.sqrt((2 * nir + 1)**2 - (6 * nir - 5 * np.sqrt(red + self.epsilon)) - 0.5 + self.epsilon),
+            "CVI": lambda nir, red, green: (nir * red) / (green**2 + self.epsilon),
+            "ARI": lambda green, red_edge: (1.0 / (green + self.epsilon)) - (1.0 / (red_edge + self.epsilon)),
+            "ARI2": lambda nir, green, red_edge: nir * (1.0 / (green + self.epsilon)) - nir * (1.0 / (red_edge + self.epsilon)),
+            "DVI": lambda nir, red: nir - red,
+            "WDVI": lambda nir, red, a=0.5: nir - a * red,
+            "SR": lambda nir, red: nir / (red + self.epsilon),
+            "MSR": lambda nir, red: (nir / (red + self.epsilon) - 1) / np.sqrt(nir / (red + self.epsilon) + 1),
+            "PVI": lambda nir, red, a=0.5, b=0.3: (nir - a * red - b) / (np.sqrt(1 + a**2) + self.epsilon),
+            "GEMI": lambda nir, red: ((2 * (nir**2 - red**2) + 1.5 * nir + 0.5 * red) / (nir + red + 0.5 + self.epsilon)) * (1 - 0.25 * ((2 * (nir**2 - red**2) + 1.5 * nir + 0.5 * red) / (nir + red + 0.5 + self.epsilon))) - ((red - 0.125) / (1 - red + self.epsilon)),
+            "ExR": lambda red, green: 1.3 * red - green,
+            "RI": lambda red, green: (red - green) / (red + green + self.epsilon),
+            "RRI1": lambda nir, red_edge: nir / (red_edge + self.epsilon),
+            "RRI2": lambda red_edge, red: red_edge / (red + self.epsilon),
+            "RRI": lambda nir, red_edge: nir / (red_edge + self.epsilon),
+            "AVI": lambda nir, red: np.cbrt(nir * (1.0 - red) * (nir - red + self.epsilon)),
+            "SIPI2": lambda nir, green, red: (nir - green) / (nir - red + self.epsilon),
+            "TCARI": lambda red_edge, red, green: 3 * ((red_edge - red) - 0.2 * (red_edge - green) * (red_edge / (red + self.epsilon))),
+            "TCARIOSAVI": lambda red_edge, red, green, nir: (3 * (red_edge - red) - 0.2 * (red_edge - green) * (red_edge / (red + self.epsilon))) / (1 + 0.16 * ((nir - red) / (nir + red + 0.16 + self.epsilon))),
+            "CCCI": lambda nir, red_edge, red: (((nir - red_edge) * (nir + red)) / ((nir + red_edge) * (nir - red) + self.epsilon)),
+            # Additional indices
+            "RDVI": lambda nir, red: (nir - red) / (np.sqrt(nir + red + self.epsilon)),
+            "NLI": lambda nir, red: ((nir**2) - red) / ((nir**2) + red + self.epsilon),
+            "BIXS": lambda green, red: np.sqrt(((green**2) + (red**2)) / 2.0),
+            "IPVI": lambda nir, red: nir / (nir + red + self.epsilon),
+            "EVI2": lambda nir, red: 2.4 * (nir - red) / (nir + red + 1.0 + self.epsilon)
+        }
+        # Define required bands for each index
+        self.index_bands = {
+            "NDVI": ["nir", "red"],
+            "GNDVI": ["nir", "green"],
+            "NDRE": ["nir", "red_edge"],
+            "GRNDVI": ["nir", "green", "red"],
+            "TNDVI": ["nir", "red"],
+            "MGRVI": ["green", "red"],
+            "GRVI": ["nir", "green"],
+            "NGRDI": ["green", "red"],
+            "MSAVI": ["nir", "red"],
+            "OSAVI": ["nir", "red"],
+            "TSAVI": ["nir", "red"],
+            "GSAVI": ["nir", "green"],
+            "GOSAVI": ["nir", "green"],
+            "GDVI": ["nir", "green"],
+            "NDWI": ["green", "nir"],
+            "DSWI4": ["green", "red"],
+            "CIRE": ["nir", "red_edge"],
+            "LCI": ["nir", "red_edge"],
+            "CIgreen": ["nir", "green"],
+            "MCARI": ["red_edge", "red", "green"],
+            "MCARI1": ["nir", "red", "green"],
+            "MCARI2": ["nir", "red", "green"],
+            "MTVI1": ["nir", "red", "green"],
+            "MTVI2": ["nir", "red", "green"],
+            "CVI": ["nir", "red", "green"],
+            "ARI": ["green", "red_edge"],
+            "ARI2": ["nir", "green", "red_edge"],
+            "DVI": ["nir", "red"],
+            "WDVI": ["nir", "red"],
+            "SR": ["nir", "red"],
+            "MSR": ["nir", "red"],
+            "PVI": ["nir", "red"],
+            "GEMI": ["nir", "red"],
+            "ExR": ["red", "green"],
+            "RI": ["red", "green"],
+            "RRI1": ["nir", "red_edge"],
+            "RRI2": ["red_edge", "red"],
+            "RRI": ["nir", "red_edge"],
+            "AVI": ["nir", "red"],
+            "SIPI2": ["nir", "green", "red"],
+            "TCARI": ["red_edge", "red", "green"],
+            "TCARIOSAVI": ["red_edge", "red", "green", "nir"],
+            "CCCI": ["nir", "red_edge", "red"],
+            "RDVI": ["nir", "red"],
+            "NLI": ["nir", "red"],
+            "BIXS": ["green", "red"],
+            "IPVI": ["nir", "red"],
+            "EVI2": ["nir", "red"]
+        }
+    def compute_vegetation_indices(self, spectral_stack: Dict[str, np.ndarray],
+                                 mask: np.ndarray) -> Dict[str, Dict[str, Any]]:
+        """
+        Compute vegetation indices from spectral data.
+        Args:
+            spectral_stack: Dictionary of spectral bands
+            mask: Binary mask for the plant
+        Returns:
+            Dictionary of vegetation indices with values and statistics
+        """
+        indices = {}
+        for index_name, formula in self.index_formulas.items():
+            try:
+                # Get required bands
+                required_bands = self.index_bands.get(index_name, [])
+                # Check if all required bands are available
+                if not all(band in spectral_stack for band in required_bands):
+                    logger.warning(f"Skipping {index_name}: missing required bands")
+                    continue
+                # Extract band data as float arrays
+                band_data = []
+                for band in required_bands:
+                    arr = spectral_stack[band]
+                    # Ensure numeric float np.ndarray
+                    if isinstance(arr, np.ndarray):
+                        arr = arr.squeeze(-1)
+                    arr = np.asarray(arr, dtype=np.float64)
+                    band_data.append(arr)
+                # Compute index (ensure float math)
+                index_values = formula(*band_data).astype(np.float64)
+                # Apply mask
+                if mask is not None:
+                    binary_mask = (np.asarray(mask).astype(np.int32) > 0)
+                    masked_values = np.where(binary_mask, index_values, np.nan)
+                else:
+                    masked_values = index_values
+                # Compute statistics
+                valid_values = masked_values[~np.isnan(masked_values)]
+                if len(valid_values) > 0:
+                    stats = {
+                        'mean': float(np.mean(valid_values)),
+                        'std': float(np.std(valid_values)),
+                        'min': float(np.min(valid_values)),
+                        'max': float(np.max(valid_values)),
+                        'median': float(np.median(valid_values)),
+                        'q25': float(np.percentile(valid_values, 25)),
+                        'q75': float(np.percentile(valid_values, 75)),
+                        'nan_fraction': float(np.isnan(masked_values).sum() / masked_values.size)
+                    }
+                else:
+                    stats = {
+                        'mean': 0.0, 'std': 0.0, 'min': 0.0, 'max': 0.0,
+                        'median': 0.0, 'q25': 0.0, 'q75': 0.0, 'nan_fraction': 1.0
+                    }
+                indices[index_name] = {
+                    'values': masked_values,
+                    'statistics': stats
+                }
+                logger.debug(f"Computed {index_name}")
+            except Exception as e:
+                logger.error(f"Failed to compute {index_name}: {e}")
+                continue
+        return indices
+    def create_vegetation_index_image(self, index_values: np.ndarray,
+                                    colormap: str = 'RdYlGn',
+                                    vmin: Optional[float] = None,
+                                    vmax: Optional[float] = None) -> np.ndarray:
+        """
+        Create visualization image for vegetation index.
+        Args:
+            index_values: Vegetation index values
+            colormap: Matplotlib colormap name
+            vmin: Minimum value for normalization
+            vmax: Maximum value for normalization
+        Returns:
+            RGB image array
+        """
+        try:
+            import matplotlib.pyplot as plt
+            import matplotlib.cm as cm
+            from matplotlib.colors import Normalize
+            # Determine value range
+            valid_values = index_values[~np.isnan(index_values)]
+            if len(valid_values) == 0:
+                return np.zeros((*index_values.shape, 3), dtype=np.uint8)
+            if vmin is None:
+                vmin = np.min(valid_values)
+            if vmax is None:
+                vmax = np.max(valid_values)
+            # Normalize values
+            norm = Normalize(vmin=vmin, vmax=vmax)
+            cmap = cm.get_cmap(colormap)
+            # Apply colormap
+            rgba_img = cmap(norm(index_values))
+            rgba_img[np.isnan(index_values)] = [1, 1, 1, 1]  # White for NaN
+            # Convert to RGB uint8
+            rgb_img = (rgba_img[:, :, :3] * 255).astype(np.uint8)
+            return rgb_img
+        except Exception as e:
+            logger.error(f"Failed to create vegetation index image: {e}")
+            return np.zeros((*index_values.shape, 3), dtype=np.uint8)
+    def get_available_indices(self) -> list:
+        """Get list of available vegetation indices."""
+        return list(self.index_formulas.keys())
+    def get_index_requirements(self, index_name: str) -> list:
+        """
+        Get required bands for a specific index.
+        Args:
+            index_name: Name of the vegetation index
+        Returns:
+            List of required band names
+        """
+        return self.index_bands.get(index_name, [])
+    def validate_spectral_data(self, spectral_stack: Dict[str, np.ndarray]) -> bool:
+        """
+        Validate spectral data for vegetation index computation.
+        Args:
+            spectral_stack: Dictionary of spectral bands
+        Returns:
+            True if valid, False otherwise
+        """
+        if not spectral_stack:
+            return False
+        required_bands = ['nir', 'red', 'green', 'red_edge']
+        if not all(band in spectral_stack for band in required_bands):
+            logger.warning("Missing required spectral bands")
+            return False
+        # Check data shapes
+        shapes = [arr.shape for arr in spectral_stack.values()]
+        if not all(shape == shapes[0] for shape in shapes):
+            logger.warning("Inconsistent spectral band shapes")
+            return False
+        return True

sorghum_pipeline/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+Model definitions for the Sorghum Pipeline.
+This package contains neural network models and other
+computational models used in the pipeline.
+"""
+from .dbc_lacunarity import DBC_Lacunarity
+__all__ = ["DBC_Lacunarity"]

sorghum_pipeline/models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (438 Bytes). View file

sorghum_pipeline/models/__pycache__/dbc_lacunarity.cpython-312.pyc ADDED Viewed

Binary file (4.14 kB). View file

sorghum_pipeline/models/dbc_lacunarity.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+DBC Lacunarity model for texture analysis.
+This module implements the Differential Box Counting (DBC) method
+for computing lacunarity features.
+"""
+import torch
+import torch.nn as nn
+from typing import Optional
+class DBC_Lacunarity(nn.Module):
+    """
+    Differential Box Counting Lacunarity model.
+    This model computes lacunarity features using the DBC method,
+    which is useful for texture analysis in plant images.
+    """
+    def __init__(self, model_name: str = 'Net', window_size: int = 3, eps: float = 1e-6):
+        """
+        Initialize DBC Lacunarity model.
+        Args:
+            model_name: Name of the model
+            window_size: Size of the sliding window
+            eps: Small value to avoid division by zero
+        """
+        super(DBC_Lacunarity, self).__init__()
+        self.window_size = window_size
+        self.normalize = nn.Tanh()
+        self.num_output_channels = 3
+        self.eps = eps
+        self.r = 1
+        self.model_name = model_name
+        self.max_pool = nn.MaxPool2d(kernel_size=self.window_size, stride=1)
+    def forward(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the DBC Lacunarity model.
+        Args:
+            image: Input image tensor [B, C, H, W]
+        Returns:
+            Lacunarity features tensor
+        """
+        # Normalize image to 0-255 range
+        image = ((self.normalize(image) + 1) / 2) * 255
+        # Perform operations independently for each window in the current channel
+        max_pool_output = self.max_pool(image)
+        min_pool_output = -self.max_pool(-image)
+        # Compute DBC lacunarity
+        nr = torch.ceil(max_pool_output / (self.r + self.eps)) - torch.ceil(min_pool_output / (self.r + self.eps)) - 1
+        Mr = torch.sum(nr)
+        Q_mr = nr / (self.window_size - self.r + 1)
+        L_r = (Mr**2) * Q_mr / (Mr * Q_mr + self.eps)**2
+        return L_r
+    def compute_lacunarity(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Compute lacunarity for a single image.
+        Args:
+            image: Input image tensor [1, 1, H, W]
+        Returns:
+            Lacunarity tensor
+        """
+        with torch.no_grad():
+            return self.forward(image)
+    def get_model_info(self) -> dict:
+        """
+        Get model information.
+        Returns:
+            Dictionary containing model parameters
+        """
+        return {
+            'model_name': self.model_name,
+            'window_size': self.window_size,
+            'eps': self.eps,
+            'r': self.r,
+            'num_output_channels': self.num_output_channels
+        }

sorghum_pipeline/output/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+Output management modules for the Sorghum Pipeline.
+This package contains output functionality including:
+- Result saving
+- Visualization generation
+- Report creation
+- Data export
+"""
+from .manager import OutputManager
+__all__ = ["OutputManager"]

sorghum_pipeline/output/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (470 Bytes). View file

sorghum_pipeline/output/__pycache__/manager.cpython-312.pyc ADDED Viewed

Binary file (40.9 kB). View file

sorghum_pipeline/output/manager.py ADDED Viewed

	@@ -0,0 +1,688 @@

+"""
+Output manager for the Sorghum Pipeline.
+This module handles saving results, generating visualizations,
+and creating reports.
+"""
+import os
+import json
+import numpy as np
+import cv2
+# Use a non-GUI backend to avoid segmentation faults in headless runs
+try:
+    import matplotlib
+    if os.environ.get('MPLBACKEND') is None:
+        matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
+    from matplotlib.colors import Normalize
+except Exception:
+    # Fallback safe imports (should not happen normally)
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
+    from matplotlib.colors import Normalize
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from pathlib import Path
+from typing import Dict, Any, Optional, List, Tuple
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import pandas as pd
+import logging
+logger = logging.getLogger(__name__)
+class OutputManager:
+    """Manages output generation and saving."""
+    def __init__(self, output_folder: str, settings: Any):
+        """
+        Initialize output manager.
+        Args:
+            output_folder: Base output folder
+            settings: Output settings from config
+        """
+        self.output_folder = Path(output_folder)
+        self.settings = settings
+        # Fast mode and parallel save controls
+        try:
+            self.fast_mode: bool = bool(int(os.environ.get('FAST_OUTPUT', '0'))) or bool(getattr(settings, 'fast_mode', False))
+        except Exception:
+            self.fast_mode = False
+        try:
+            self.max_workers: int = int(os.environ.get('FAST_SAVE_WORKERS', '4'))
+        except Exception:
+            self.max_workers = 4
+        try:
+            self.png_compression: int = int(os.environ.get('PNG_COMPRESSION', '1'))  # 0-9; 1 is fast
+        except Exception:
+            self.png_compression = 1
+        # Reduce thread usage to lower risk of native library segfaults
+        try:
+            import os as _os
+            _os.environ.setdefault('OMP_NUM_THREADS', '1')
+            _os.environ.setdefault('OPENBLAS_NUM_THREADS', '1')
+            _os.environ.setdefault('MKL_NUM_THREADS', '1')
+            _os.environ.setdefault('NUMEXPR_NUM_THREADS', '1')
+        except Exception:
+            pass
+        try:
+            cv2.setNumThreads(1)
+        except Exception:
+            pass
+        # Create base directories
+        self.output_folder.mkdir(parents=True, exist_ok=True)
+    def _imwrite_fast(self, dest: Path, img: np.ndarray) -> None:
+        try:
+            cv2.imwrite(str(dest), img, [cv2.IMWRITE_PNG_COMPRESSION, int(self.png_compression)])
+        except Exception:
+            cv2.imwrite(str(dest), img)
+    def create_output_directories(self) -> None:
+        """Ensure base output directory exists.
+        Note: Do NOT create subdirectories at the root (e.g., 'analysis').
+        Subdirectories are created within each plant's directory only.
+        """
+        self.output_folder.mkdir(parents=True, exist_ok=True)
+    def save_plant_results(self, plant_key: str, plant_data: Dict[str, Any]) -> None:
+        """
+        Save all results for a single plant.
+        Args:
+            plant_key: Plant identifier (e.g., "2025_02_05_plant1_frame8")
+            plant_data: Plant data dictionary
+        """
+        try:
+            # Parse plant key
+            parts = plant_key.split('_')
+            date_key = "_".join(parts[:3])
+            plant_name = parts[3]
+            frame_key = parts[4] if len(parts) > 4 else "frame0"
+            # Create plant-specific directory
+            plant_dir = self.output_folder / date_key / plant_name
+            plant_dir.mkdir(parents=True, exist_ok=True)
+            # Save segmentation results
+            self._save_segmentation_results(plant_dir, plant_name, plant_data)
+            # Save texture features
+            self._save_texture_features(plant_dir, plant_data)
+            # Save vegetation indices
+            self._save_vegetation_indices(plant_dir, plant_data)
+            # Save morphology features
+            self._save_morphology_features(plant_dir, plant_data)
+            # Save analysis plots
+            self._save_analysis_plots(plant_dir, plant_data)
+            # Save metadata
+            self._save_metadata(plant_dir, plant_key, plant_data)
+            logger.debug(f"Results saved for {plant_key}")
+        except Exception as e:
+            logger.error(f"Failed to save results for {plant_key}: {e}")
+    def _save_segmentation_results(self, plant_dir: Path, plant_name: str, plant_data: Dict[str, Any]) -> None:
+        """Save segmentation results."""
+        if not self.settings.save_images:
+            return
+        seg_dir = plant_dir / self.settings.segmentation_dir
+        seg_dir.mkdir(exist_ok=True)
+        try:
+            tasks: List[Tuple[Path, np.ndarray]] = []
+            # Choose which base image to present in original/overlay
+            use_feature_image = False
+            try:
+                # Allow env override, and special-case plants 13-16 per user requirement
+                use_feature_image = bool(int(os.environ.get('OUTPUT_USE_FEATURE_IMAGE', '0'))) or plant_name in { 'plant13','plant14','plant15','plant16' }
+            except Exception:
+                use_feature_image = plant_name in { 'plant13','plant14','plant15','plant16' }
+            if use_feature_image:
+                base_image = plant_data.get('composite', plant_data.get('segmentation_composite'))
+            else:
+                base_image = plant_data.get('segmentation_composite', plant_data.get('composite'))
+            if base_image is not None:
+                tasks.append((seg_dir / 'original.png', base_image))
+            if 'mask' in plant_data:
+                tasks.append((seg_dir / 'mask.png', plant_data['mask']))
+            if 'mask3' in plant_data and isinstance(plant_data['mask3'], np.ndarray):
+                tasks.append((seg_dir / 'mask3.png', plant_data['mask3']))
+            # Save the BRIA-generated mask (if present before overrides) as mask2.png
+            if 'original_mask' in plant_data and isinstance(plant_data['original_mask'], np.ndarray):
+                tasks.append((seg_dir / 'mask2.png', plant_data['original_mask']))
+            if base_image is not None and 'mask' in plant_data:
+                overlay = self._create_overlay(base_image, plant_data['mask'])
+                tasks.append((seg_dir / 'overlay.png', overlay))
+            if 'masked_composite' in plant_data:
+                tasks.append((seg_dir / 'masked_composite.png', plant_data['masked_composite']))
+            # Create white-background maskouts
+            try:
+                if base_image is not None and 'mask' in plant_data:
+                    maskout_external = self._create_maskout_white_background(base_image, plant_data['mask'])
+                    tasks.append((seg_dir / 'maskout_external.png', maskout_external))
+                # BRIA-only maskout directly on original composite
+                if base_image is not None and 'original_mask' in plant_data and isinstance(plant_data['original_mask'], np.ndarray):
+                    maskout_bria = self._create_maskout_white_background(base_image, plant_data['original_mask'])
+                    tasks.append((seg_dir / 'maskout_bria.png', maskout_bria))
+                # mask3 maskout on original composite
+                if base_image is not None and 'mask3' in plant_data and isinstance(plant_data['mask3'], np.ndarray):
+                    maskout_mask3 = self._create_maskout_white_background(base_image, plant_data['mask3'])
+                    tasks.append((seg_dir / 'maskout_mask3.png', maskout_mask3))
+            except Exception as _e:
+                logger.debug(f"Failed to create double maskouts: {_e}")
+            if self.max_workers > 1 and len(tasks) > 1:
+                with ThreadPoolExecutor(max_workers=self.max_workers) as ex:
+                    futures = [ex.submit(self._imwrite_fast, p, img) for p, img in tasks]
+                    for _ in as_completed(futures):
+                        pass
+            else:
+                for p, img in tasks:
+                    self._imwrite_fast(p, img)
+        except Exception as e:
+            logger.error(f"Failed to save segmentation results: {e}")
+    def _save_texture_features(self, plant_dir: Path, plant_data: Dict[str, Any]) -> None:
+        """Save texture features."""
+        if not self.settings.save_images or 'texture_features' not in plant_data:
+            return
+        texture_dir = plant_dir / self.settings.texture_dir
+        texture_dir.mkdir(exist_ok=True)
+        def save_feature_png(feature_name: str, values: Any, dest: Path, cmap_name: str = 'viridis') -> None:
+            try:
+                arr = np.asarray(values)
+                if arr.ndim == 3 and arr.shape[-1] == 3:
+                    self._imwrite_fast(dest, cv2.cvtColor(arr.astype(np.uint8), cv2.COLOR_RGB2BGR))
+                    return
+                if self.fast_mode:
+                    # Fast path: simple normalization, no matplotlib
+                    normalized = self._normalize_to_uint8(np.nan_to_num(arr.astype(np.float64), nan=0.0))
+                    self._imwrite_fast(dest, normalized)
+                else:
+                    arr = arr.astype(np.float64)
+                    masked = np.ma.masked_invalid(arr)
+                    fig, ax = plt.subplots(figsize=(5, 5))
+                    ax.set_axis_off()
+                    ax.set_facecolor('white')
+                    im = ax.imshow(masked, cmap=cmap_name)
+                    divider = make_axes_locatable(ax)
+                    cax = divider.append_axes("right", size="2%", pad=0.02)
+                    cbar = plt.colorbar(im, cax=cax, orientation='vertical')
+                    cbar.set_label(feature_name, fontsize=7)
+                    cbar.ax.tick_params(labelsize=6, width=0.5, length=2)
+                    if hasattr(cbar, 'outline') and cbar.outline is not None:
+                        cbar.outline.set_linewidth(0.5)
+                    plt.tight_layout()
+                    plt.savefig(dest, dpi=self.settings.plot_dpi, bbox_inches='tight')
+                    plt.close(fig)
+            except Exception as e:
+                logger.error(f"Failed to save texture feature image for {feature_name}: {e}")
+                try:
+                    normalized = self._normalize_to_uint8(np.nan_to_num(arr, nan=0.0))
+                    self._imwrite_fast(dest, normalized)
+                except Exception:
+                    pass
+        try:
+            texture_features = plant_data['texture_features']
+            for band, band_data in texture_features.items():
+                if 'features' not in band_data:
+                    continue
+                band_dir = texture_dir / band
+                band_dir.mkdir(exist_ok=True)
+                features = band_data['features']
+                # Save individual feature maps (optionally in parallel)
+                items: List[Tuple[str, np.ndarray, Path, str]] = []
+                for feature_name, feature_map in features.items():
+                    if feature_name == 'ehd_features':
+                        for i in range(feature_map.shape[0]):
+                            channel = feature_map[i]
+                            if isinstance(channel, np.ndarray) and channel.size > 0:
+                                items.append((f'ehd_channel_{i}', channel, band_dir / f'ehd_channel_{i}.png', 'magma'))
+                    else:
+                        if isinstance(feature_map, np.ndarray) and feature_map.size > 0:
+                            cmap_choice = 'gray' if feature_name in ('lbp', 'hog') else 'plasma' if feature_name.startswith('lac') else 'viridis'
+                            items.append((feature_name, feature_map, band_dir / f'{feature_name}.png', cmap_choice))
+                if self.max_workers > 1 and len(items) > 1:
+                    with ThreadPoolExecutor(max_workers=self.max_workers) as ex:
+                        futures = [ex.submit(save_feature_png, n, m, p, c) for (n, m, p, c) in items]
+                        for _ in as_completed(futures):
+                            pass
+                else:
+                    for (n, m, p, c) in items:
+                        save_feature_png(n, m, p, c)
+                # Create feature summary plot
+                self._create_texture_summary_plot(band_dir, features, band)
+                # Save texture statistics if available
+                if 'statistics' in band_data and isinstance(band_data['statistics'], dict):
+                    try:
+                        with open(band_dir / 'texture_statistics.json', 'w') as f:
+                            json.dump(band_data['statistics'], f, indent=2)
+                    except Exception as e:
+                        logger.error(f"Failed to save texture statistics for {band}: {e}")
+        except Exception as e:
+            logger.error(f"Failed to save texture features: {e}")
+    def _save_vegetation_indices(self, plant_dir: Path, plant_data: Dict[str, Any]) -> None:
+        """Save vegetation indices."""
+        if not self.settings.save_images or 'vegetation_indices' not in plant_data:
+            return
+        veg_dir = plant_dir / self.settings.vegetation_dir
+        veg_dir.mkdir(exist_ok=True)
+        # Colormap and range settings per index
+        index_cmap_settings = {
+            "NDVI": (cm.RdYlGn, -1, 1),
+            "GNDVI": (cm.RdYlGn, -1, 1),
+            "NDRE": (cm.RdYlGn, -1, 1),
+            "GRNDVI": (cm.RdYlGn, -1, 1),
+            "TNDVI": (cm.RdYlGn, -1, 1),
+            "MGRVI": (cm.RdYlGn, -1, 1),
+            "GRVI": (cm.RdYlGn, -1, 1),
+            "NGRDI": (cm.RdYlGn, -1, 1),
+            "MSAVI": (cm.YlGn, 0, 1),
+            "OSAVI": (cm.YlGn, 0, 1),
+            "TSAVI": (cm.YlGn, 0, 1),
+            "GSAVI": (cm.YlGn, 0, 1),
+            "NDWI": (cm.Blues, -1, 1),
+            "DSWI4": (cm.Blues, -1, 1),
+            "CIRE": (cm.viridis, 0, 10),
+            "LCI": (cm.viridis, 0, 5),
+            "CIgreen": (cm.viridis, 0, 5),
+            "MCARI": (cm.viridis, 0, 1.5),
+            "MCARI1": (cm.viridis, 0, 1.5),
+            "MCARI2": (cm.viridis, 0, 1.5),
+            "CVI": (cm.plasma, 0, 10),
+            "TCARI": (cm.viridis, 0, 1),
+            "TCARIOSAVI": (cm.viridis, 0, 1),
+            "AVI": (cm.magma, 0, 1),
+            "SIPI2": (cm.inferno, 0, 1),
+            "ARI": (cm.magma, 0, 1),
+            "ARI2": (cm.magma, 0, 1),
+            "DVI": (cm.Greens, 0, None),
+            "WDVI": (cm.Greens, 0, None),
+            "SR": (cm.viridis, 0, 10),
+            "MSR": (cm.viridis, 0, 10),
+            "PVI": (cm.cividis, None, None),
+            "GEMI": (cm.cividis, 0, 1),
+            "ExR": (cm.Reds, -1, 1),
+            "RI": (cm.Reds, 0, None),
+            "RRI1": (cm.Reds, 0, 1)
+        }
+        def save_index_png(index_name: str, values: Any, dest: Path) -> None:
+            try:
+                arr = values
+                if not isinstance(arr, (list, tuple,)) and isinstance(arr, (float, int)):
+                    return
+                arr = np.asarray(arr, dtype=np.float64)
+                if self.fast_mode:
+                    normalized = self._normalize_to_uint8(np.nan_to_num(arr, nan=0.0))
+                    self._imwrite_fast(dest, normalized)
+                else:
+                    cmap, vmin, vmax = index_cmap_settings.get(index_name, (cm.viridis, np.nanmin(arr), np.nanmax(arr)))
+                    if vmin is None:
+                        vmin = np.nanmin(arr)
+                    if vmax is None:
+                        vmax = np.nanmax(arr)
+                    if not np.isfinite(vmin) or not np.isfinite(vmax) or vmin == vmax:
+                        vmin, vmax = 0.0, 1.0
+                    masked = np.ma.masked_invalid(arr)
+                    fig, ax = plt.subplots(figsize=(5, 5))
+                    ax.set_axis_off()
+                    ax.set_facecolor('white')
+                    im = ax.imshow(masked, cmap=cmap, vmin=vmin, vmax=vmax)
+                    divider = make_axes_locatable(ax)
+                    cax = divider.append_axes("right", size="2%", pad=0.02)
+                    cbar = plt.colorbar(im, cax=cax, orientation='vertical')
+                    cbar.set_label(index_name, fontsize=7)
+                    cbar.ax.tick_params(labelsize=6, width=0.5, length=2)
+                    if hasattr(cbar, 'outline') and cbar.outline is not None:
+                        cbar.outline.set_linewidth(0.5)
+                    plt.tight_layout()
+                    plt.savefig(dest, dpi=self.settings.plot_dpi, bbox_inches='tight')
+                    plt.close(fig)
+            except Exception as e:
+                logger.error(f"Failed to save vegetation index image for {index_name}: {e}")
+                try:
+                    # Fallback simple normalization
+                    normalized = self._normalize_to_uint8(np.nan_to_num(arr, nan=0.0))
+                    self._imwrite_fast(dest, normalized)
+                except Exception:
+                    pass
+        try:
+            vegetation_indices = plant_data['vegetation_indices']
+            items_png: List[Tuple[str, np.ndarray, Path]] = []
+            items_stats: List[Tuple[Path, Dict[str, Any]]] = []
+            for index_name, index_data in vegetation_indices.items():
+                if isinstance(index_data, dict) and 'values' in index_data:
+                    values = index_data['values']
+                    if isinstance(values, np.ndarray) and values.size > 0:
+                        items_png.append((index_name, values, veg_dir / f'{index_name}.png'))
+                    stats = index_data.get('statistics')
+                    if isinstance(stats, dict):
+                        items_stats.append((veg_dir / f'{index_name}_stats.json', stats))
+            # Save sequentially to avoid matplotlib thread-safety issues
+            for (name, arr, dest) in items_png:
+                save_index_png(name, arr, dest)
+            for (path, stats) in items_stats:
+                try:
+                    with open(path, 'w') as f:
+                        json.dump(stats, f, indent=2)
+                except Exception as e:
+                    logger.error(f"Failed to save stats for {path.name.split('.')[0]}: {e}")
+            # Create vegetation index summary (skip in fast mode)
+            if not self.fast_mode:
+                self._create_vegetation_summary_plot(veg_dir, vegetation_indices)
+            # Save aggregated vegetation statistics
+            try:
+                all_stats = {k: v.get('statistics', {}) for k, v in vegetation_indices.items() if isinstance(v, dict)}
+                with open(veg_dir / 'vegetation_statistics.json', 'w') as f:
+                    json.dump(all_stats, f, indent=2)
+            except Exception as e:
+                logger.error(f"Failed to save aggregated vegetation statistics: {e}")
+        except Exception as e:
+            logger.error(f"Failed to save vegetation indices: {e}")
+    def _save_morphology_features(self, plant_dir: Path, plant_data: Dict[str, Any]) -> None:
+        """Save morphological features."""
+        if not self.settings.save_images or 'morphology_features' not in plant_data:
+            return
+        morph_dir = plant_dir / self.settings.morphology_dir
+        morph_dir.mkdir(exist_ok=True)
+        try:
+            morphology_features = plant_data['morphology_features']
+            # Save morphological images
+            if 'images' in morphology_features:
+                for image_name, image_data in morphology_features['images'].items():
+                    if isinstance(image_data, np.ndarray) and image_data.size > 0:
+                        cv2.imwrite(str(morph_dir / f'{image_name}.png'), image_data)
+            # Save morphological data
+            if 'traits' in morphology_features:
+                traits = morphology_features['traits']
+                with open(morph_dir / 'traits.json', 'w') as f:
+                    json.dump(traits, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save morphology features: {e}")
+    def _save_analysis_plots(self, plant_dir: Path, plant_data: Dict[str, Any]) -> None:
+        """Save analysis plots."""
+        if not self.settings.save_plots or self.fast_mode:
+            return
+        analysis_dir = plant_dir / self.settings.analysis_dir
+        analysis_dir.mkdir(exist_ok=True)
+        try:
+            # Create comprehensive analysis plot
+            self._create_comprehensive_analysis_plot(analysis_dir, plant_data)
+        except Exception as e:
+            logger.error(f"Failed to save analysis plots: {e}")
+    def _save_metadata(self, plant_dir: Path, plant_key: str, plant_data: Dict[str, Any]) -> None:
+        """Save metadata for the plant."""
+        if not self.settings.save_metadata:
+            return
+        try:
+            metadata = {
+                'plant_key': plant_key,
+                'timestamp': pd.Timestamp.now().isoformat(),
+                'image_shape': plant_data.get('composite', np.array([])).shape if 'composite' in plant_data else None,
+                'has_mask': 'mask' in plant_data and plant_data['mask'] is not None,
+                'features_available': {
+                    'texture': 'texture_features' in plant_data,
+                    'vegetation': 'vegetation_indices' in plant_data,
+                    'morphology': 'morphology_features' in plant_data
+                }
+            }
+            with open(plant_dir / 'metadata.json', 'w') as f:
+                json.dump(metadata, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save metadata: {e}")
+    def _create_overlay(self, image: np.ndarray, mask: np.ndarray,
+                       color: Tuple[int, int, int] = (0, 255, 0),
+                       alpha: float = 0.5) -> np.ndarray:
+        """Return a strictly masked image: pixels where mask>0 keep original; others set to 0."""
+        if mask is None:
+            return image
+        # Resize mask to image size if needed
+        if mask.shape[:2] != image.shape[:2]:
+            try:
+                mask = cv2.resize(mask.astype(np.uint8), (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)
+            except Exception:
+                pass
+        binary = (mask.astype(np.int32) > 0).astype(np.uint8) * 255
+        return cv2.bitwise_and(image, image, mask=binary)
+    def _create_maskout_white_background(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+        """Create maskout image with white background."""
+        # Create white background
+        white_background = np.full_like(image, 255, dtype=np.uint8)
+        # Apply mask to original image (keep only masked regions)
+        masked_image = image.copy()
+        masked_image[mask == 0] = 0  # Set non-masked regions to black
+        # Combine: white background + masked image
+        result = white_background.copy()
+        result[mask > 0] = masked_image[mask > 0]
+        return result
+    def _normalize_to_uint8(self, arr: np.ndarray) -> np.ndarray:
+        """Normalize array to uint8 range."""
+        if arr.size == 0:
+            return arr.astype(np.uint8)
+        arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
+        if arr.ptp() > 0:
+            normalized = (arr - arr.min()) / (arr.ptp() + 1e-6) * 255
+        else:
+            normalized = np.zeros_like(arr)
+        return np.clip(normalized, 0, 255).astype(np.uint8)
+    def _create_texture_summary_plot(self, output_dir: Path, features: Dict[str, np.ndarray], band: str) -> None:
+        """Create texture feature summary plot."""
+        try:
+            # Get available features
+            available_features = [k for k, v in features.items()
+                                if isinstance(v, np.ndarray) and v.size > 0 and k != 'ehd_features']
+            if not available_features:
+                return
+            # Create subplot
+            n_features = len(available_features)
+            cols = min(3, n_features)
+            rows = (n_features + cols - 1) // cols
+            fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
+            if n_features == 1:
+                axes = [axes]
+            elif rows == 1:
+                axes = axes.reshape(1, -1)
+            for i, feature_name in enumerate(available_features):
+                row, col = divmod(i, cols)
+                ax = axes[row, col] if rows > 1 else axes[col]
+                feature_map = features[feature_name]
+                ax.imshow(feature_map, cmap='viridis')
+                ax.set_title(f'{band.upper()} - {feature_name.upper()}')
+                ax.axis('off')
+            # Hide unused subplots
+            for i in range(n_features, rows * cols):
+                row, col = divmod(i, cols)
+                ax = axes[row, col] if rows > 1 else axes[col]
+                ax.axis('off')
+            plt.tight_layout()
+            plt.savefig(output_dir / f'{band}_texture_summary.png',
+                       dpi=self.settings.plot_dpi, bbox_inches='tight')
+            plt.close()
+        except Exception as e:
+            logger.error(f"Failed to create texture summary plot: {e}")
+    def _create_vegetation_summary_plot(self, output_dir: Path, vegetation_indices: Dict[str, Any]) -> None:
+        """Create vegetation index summary plot."""
+        try:
+            # Get available indices
+            available_indices = [k for k, v in vegetation_indices.items()
+                               if isinstance(v, dict) and 'values' in v and isinstance(v['values'], np.ndarray)]
+            if not available_indices:
+                return
+            # Create subplot
+            n_indices = len(available_indices)
+            cols = min(3, n_indices)
+            rows = (n_indices + cols - 1) // cols
+            fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
+            if n_indices == 1:
+                axes = [axes]
+            elif rows == 1:
+                axes = axes.reshape(1, -1)
+            for i, index_name in enumerate(available_indices):
+                row, col = divmod(i, cols)
+                ax = axes[row, col] if rows > 1 else axes[col]
+                values = vegetation_indices[index_name]['values']
+                im = ax.imshow(values, cmap='RdYlGn')
+                ax.set_title(f'{index_name}')
+                ax.axis('off')
+                divider = make_axes_locatable(ax)
+                cax = divider.append_axes("right", size="2%", pad=0.02)
+                cbar = plt.colorbar(im, cax=cax, orientation='vertical')
+                cbar.ax.tick_params(labelsize=6, width=0.5, length=2)
+                if hasattr(cbar, 'outline') and cbar.outline is not None:
+                    cbar.outline.set_linewidth(0.5)
+            # Hide unused subplots
+            for i in range(n_indices, rows * cols):
+                row, col = divmod(i, cols)
+                ax = axes[row, col] if rows > 1 else axes[col]
+                ax.axis('off')
+            plt.tight_layout()
+            plt.savefig(output_dir / 'vegetation_indices_summary.png',
+                       dpi=self.settings.plot_dpi, bbox_inches='tight')
+            plt.close()
+        except Exception as e:
+            logger.error(f"Failed to create vegetation summary plot: {e}")
+    def _create_comprehensive_analysis_plot(self, output_dir: Path, plant_data: Dict[str, Any]) -> None:
+        """Create comprehensive analysis plot."""
+        try:
+            fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+            # Original image
+            if 'composite' in plant_data:
+                axes[0, 0].imshow(cv2.cvtColor(plant_data['composite'], cv2.COLOR_BGR2RGB))
+                axes[0, 0].set_title('Original Composite')
+                axes[0, 0].axis('off')
+            # Mask
+            if 'mask' in plant_data:
+                axes[0, 1].imshow(plant_data['mask'], cmap='gray')
+                axes[0, 1].set_title('Segmentation Mask')
+                axes[0, 1].axis('off')
+            # Overlay
+            if 'composite' in plant_data and 'mask' in plant_data:
+                overlay = self._create_overlay(plant_data['composite'], plant_data['mask'])
+                axes[0, 2].imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
+                axes[0, 2].set_title('Overlay')
+                axes[0, 2].axis('off')
+            # Texture features (if available)
+            if 'texture_features' in plant_data and 'color' in plant_data['texture_features']:
+                color_features = plant_data['texture_features']['color'].get('features', {})
+                if 'lbp' in color_features:
+                    axes[1, 0].imshow(color_features['lbp'], cmap='viridis')
+                    axes[1, 0].set_title('LBP Texture')
+                    axes[1, 0].axis('off')
+            # Vegetation indices (if available)
+            if 'vegetation_indices' in plant_data:
+                veg_indices = plant_data['vegetation_indices']
+                if 'NDVI' in veg_indices and 'values' in veg_indices['NDVI']:
+                    axes[1, 1].imshow(veg_indices['NDVI']['values'], cmap='RdYlGn')
+                    axes[1, 1].set_title('NDVI')
+                    axes[1, 1].axis('off')
+            # Morphology (if available)
+            if 'morphology_features' in plant_data and 'images' in plant_data['morphology_features']:
+                morph_images = plant_data['morphology_features']['images']
+                if 'skeleton' in morph_images:
+                    axes[1, 2].imshow(morph_images['skeleton'], cmap='gray')
+                    axes[1, 2].set_title('Skeleton')
+                    axes[1, 2].axis('off')
+            plt.tight_layout()
+            plt.savefig(output_dir / 'comprehensive_analysis.png',
+                       dpi=min(getattr(self.settings, 'plot_dpi', 100), 100), bbox_inches='tight')
+            plt.close()
+        except Exception as e:
+            logger.error(f"Failed to create comprehensive analysis plot: {e}")
+    def create_pipeline_summary(self, results: Dict[str, Any]) -> None:
+        """Create a summary of the entire pipeline run."""
+        try:
+            summary_file = self.output_folder / 'pipeline_summary.json'
+            with open(summary_file, 'w') as f:
+                json.dump(results['summary'], f, indent=2)
+            logger.info(f"Pipeline summary saved to {summary_file}")
+        except Exception as e:
+            logger.error(f"Failed to create pipeline summary: {e}")

sorghum_pipeline/pipeline.py ADDED Viewed

	@@ -0,0 +1,1377 @@

+"""
+Main pipeline class for the Sorghum Plant Phenotyping Pipeline.
+This module orchestrates the entire pipeline from data loading
+to feature extraction and result output.
+"""
+import os
+import subprocess
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional, List, Set
+import numpy as np
+import cv2
+import torch
+from torchvision import transforms
+from transformers import AutoModelForImageSegmentation
+from sklearn.decomposition import PCA
+try:
+    from tqdm import tqdm
+except Exception:
+    tqdm = None
+from .config import Config
+from .data import DataLoader, ImagePreprocessor, MaskHandler
+from .features import TextureExtractor, VegetationIndexExtractor, MorphologyExtractor
+from .output import OutputManager
+from .segmentation import SegmentationManager
+# Make occlusion handling optional if the module is not present
+try:
+    from .segmentation.occlusion_handler import OcclusionHandler  # type: ignore
+except Exception:
+    OcclusionHandler = None  # type: ignore
+class SorghumPipeline:
+    """
+    Main pipeline class for sorghum plant phenotyping.
+    This class orchestrates the entire pipeline from data loading
+    to feature extraction and result output.
+    """
+    def __init__(self, config_path: Optional[str] = None, config: Optional[Config] = None, include_ignored: bool = False, enable_occlusion_handling: bool = False, enable_instance_integration: bool = False, strict_loader: bool = False, excluded_dates: Optional[List[str]] = None):
+        """
+        Initialize the pipeline.
+        Args:
+            config_path: Path to configuration file
+            config: Configuration object (if not using file)
+            include_ignored: Whether to include ignored plants
+            enable_occlusion_handling: Whether to enable SAM2Long occlusion handling
+        """
+        # Setup logging
+        self._setup_logging()
+        # Load configuration
+        if config is not None:
+            self.config = config
+        elif config_path is not None:
+            self.config = Config(config_path)
+        else:
+            raise ValueError("Either config_path or config must be provided")
+        # Validate configuration
+        self.config.validate()
+        # Store settings
+        self.enable_occlusion_handling = enable_occlusion_handling
+        self.enable_instance_integration = enable_instance_integration
+        self.strict_loader = strict_loader
+        self.excluded_dates = excluded_dates or []
+        # Initialize components
+        self._initialize_components(include_ignored)
+        logger.info("Sorghum Pipeline initialized successfully")
+    def _setup_logging(self):
+        """Setup logging configuration."""
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            handlers=[
+                logging.StreamHandler(),
+                logging.FileHandler('sorghum_pipeline.log')
+            ]
+        )
+        global logger
+        logger = logging.getLogger(__name__)
+    def _initialize_components(self, include_ignored: bool = False):
+        """Initialize all pipeline components."""
+        # Data components
+        self.data_loader = DataLoader(
+            input_folder=self.config.paths.input_folder,
+            debug=True,
+            include_ignored=include_ignored,
+            strict_loader=self.strict_loader,
+            excluded_dates=self.excluded_dates,
+        )
+        self.preprocessor = ImagePreprocessor(
+            target_size=self.config.processing.target_size
+        )
+        self.mask_handler = MaskHandler(
+            min_area=self.config.processing.min_component_area,
+            kernel_size=self.config.processing.morphology_kernel_size
+        )
+        # Feature extractors
+        self.texture_extractor = TextureExtractor(
+            lbp_points=self.config.processing.lbp_points,
+            lbp_radius=self.config.processing.lbp_radius,
+            hog_orientations=self.config.processing.hog_orientations,
+            hog_pixels_per_cell=self.config.processing.hog_pixels_per_cell,
+            hog_cells_per_block=self.config.processing.hog_cells_per_block,
+            lacunarity_window=self.config.processing.lacunarity_window,
+            ehd_threshold=self.config.processing.ehd_threshold,
+            angle_resolution=self.config.processing.angle_resolution
+        )
+        self.vegetation_extractor = VegetationIndexExtractor(
+            epsilon=self.config.processing.epsilon,
+            soil_factor=self.config.processing.soil_factor
+        )
+        self.morphology_extractor = MorphologyExtractor(
+            pixel_to_cm=self.config.processing.pixel_to_cm,
+            prune_sizes=self.config.processing.prune_sizes
+        )
+        # Segmentation
+        self.segmentation_manager = SegmentationManager(
+            model_name=self.config.model.model_name,
+            device=self.config.get_device(),
+            threshold=self.config.processing.segmentation_threshold,
+            trust_remote_code=self.config.model.trust_remote_code,
+            cache_dir=self.config.model.cache_dir if getattr(self.config.model, 'cache_dir', '') else None,
+            local_files_only=getattr(self.config.model, 'local_files_only', False),
+        )
+        # Occlusion handling (optional)
+        self.occlusion_handler = None
+        if self.enable_occlusion_handling and OcclusionHandler is not None:
+            try:
+                self.occlusion_handler = OcclusionHandler(
+                    device=self.config.get_device(),
+                    model="tiny",  # Can be made configurable
+                    confidence_threshold=0.5,
+                    iou_threshold=0.1
+                )
+                logger.info("Occlusion handler initialized successfully")
+            except Exception as e:
+                logger.warning(f"Failed to initialize occlusion handler: {e}")
+                logger.warning("Continuing without occlusion handling")
+                self.occlusion_handler = None
+        elif self.enable_occlusion_handling and OcclusionHandler is None:
+            logger.warning("Occlusion handler module not found; continuing without occlusion handling")
+        # Output manager
+        self.output_manager = OutputManager(
+            output_folder=self.config.paths.output_folder,
+            settings=self.config.output
+        )
+    def _free_gpu_memory_before_instance(self) -> None:
+        """Attempt to free GPU memory prior to running SAM2Long in a subprocess.
+        - Moves BRIA segmentation model to CPU if present
+        - Deletes the model reference to release VRAM
+        - Calls torch.cuda.empty_cache()
+        """
+        try:
+            import torch as _torch  # type: ignore
+            # Move BRIA model to CPU and drop reference
+            try:
+                if getattr(self, 'segmentation_manager', None) is not None:
+                    mdl = getattr(self.segmentation_manager, 'model', None)
+                    if mdl is not None:
+                        try:
+                            mdl.to('cpu')
+                        except Exception:
+                            pass
+                        try:
+                            delattr(self.segmentation_manager, 'model')
+                        except Exception:
+                            pass
+                        # Ensure attribute exists but is None for future checks
+                        try:
+                            self.segmentation_manager.model = None  # type: ignore
+                        except Exception:
+                            pass
+            except Exception:
+                pass
+            # Free CUDA cache
+            try:
+                if _torch.cuda.is_available():
+                    _torch.cuda.empty_cache()
+            except Exception:
+                pass
+            logger.info("Freed GPU memory before SAM2Long invocation (moved BRIA to CPU and emptied cache)")
+        except Exception as e:
+            logger.warning(f"Failed to free GPU memory before instance segmentation: {e}")
+    def run(self, load_all_frames: bool = False, segmentation_only: bool = False, filter_plants: Optional[List[str]] = None, filter_frames: Optional[List[str]] = None, run_instance_segmentation: bool = False, features_frame_only: Optional[int] = None, reuse_instance_results: bool = False, instance_mapping_path: Optional[str] = None, force_reprocess: bool = False, respect_instance_frame_rules_for_features: bool = False, substitute_feature_image_from_instance_src: bool = False) -> Dict[str, Any]:
+        """
+        Run the complete pipeline.
+        Args:
+            load_all_frames: Whether to load all frames or selected frames
+            segmentation_only: If True, run segmentation only and skip feature extraction
+        Returns:
+            Dictionary containing all results
+        """
+        logger.info("Starting Sorghum Pipeline...")
+        try:
+            import time
+            total_start = time.perf_counter()
+            # Step 1: Load data
+            logger.info("Step 1/6: Loading data...")
+            # In reuse mode we need all frames to select the mapped frame per plant
+            if reuse_instance_results:
+                plants = self.data_loader.load_all_frames()
+            else:
+                # If specific frames are requested, we must load all frames to filter correctly
+                if load_all_frames or (filter_frames is not None and len(filter_frames) > 0):
+                    plants = self.data_loader.load_all_frames()
+                else:
+                    plants = self.data_loader.load_selected_frames()
+            # Optional filter by specific plant names (e.g., ["plant1"])
+            if filter_plants:
+                allowed = set(filter_plants)
+                plants = {
+                    key: pdata for key, pdata in plants.items()
+                    if len(key.split('_')) > 3 and key.split('_')[3] in allowed
+                }
+            # Optional filter by specific frame numbers (e.g., ["9"] or ["frame9"])
+            if filter_frames:
+                # Normalize to 'frameX' tokens
+                wanted = set(
+                    [f if str(f).startswith('frame') else f"frame{str(f)}" for f in filter_frames]
+                )
+                plants = {
+                    key: pdata for key, pdata in plants.items()
+                    if key.split('_')[-1] in wanted
+                }
+            if not plants:
+                raise ValueError("No plant data loaded")
+            logger.info(f"Loaded {len(plants)} plants")
+            # If reusing instance results with mapping, restrict to exactly the mapped frame per plant (default frame8)
+            if reuse_instance_results:
+                try:
+                    import json as _json
+                    if instance_mapping_path is None:
+                        raise ValueError("instance_mapping_path is required in reuse mode")
+                    _map = _json.load(open(instance_mapping_path, 'r'))
+                    # Normalize mapping plant keys and compute target frame (default 8)
+                    target_frame_by_plant = {}
+                    for pk, pv in _map.items():
+                        k_norm = pk if str(pk).startswith('plant') else f"plant{int(pk)}" if str(pk).isdigit() else str(pk)
+                        try:
+                            target_frame_by_plant[k_norm] = int(pv.get('frame', 8))
+                        except Exception:
+                            target_frame_by_plant[k_norm] = 8
+                    before = len(plants)
+                    plants = {
+                        key: pdata for key, pdata in plants.items()
+                        if (len(key.split('_')) > 3 and key.split('_')[3] in target_frame_by_plant
+                            and key.split('_')[-1] == f"frame{target_frame_by_plant[key.split('_')[3]]}")
+                    }
+                    logger.info(f"Restricted loaded data by mapping frames: {before} -> {len(plants)} items")
+                except Exception as e:
+                    logger.warning(f"Failed to restrict loaded data by mapping frames: {e}")
+            # Skip plants that already have saved results (unless force_reprocess)
+            if not force_reprocess:
+                try:
+                    before = len(plants)
+                    filtered = {}
+                    for key, pdata in plants.items():
+                        parts = key.split('_')
+                        if len(parts) < 5:
+                            filtered[key] = pdata
+                            continue
+                        date_key = "_".join(parts[:3])
+                        plant_name = parts[3]
+                        plant_dir = Path(self.config.paths.output_folder) / date_key / plant_name
+                        meta_ok = (plant_dir / 'metadata.json').exists()
+                        seg_mask_ok = (plant_dir / self.config.output.segmentation_dir / 'mask.png').exists()
+                        if meta_ok or seg_mask_ok:
+                            continue
+                        filtered[key] = pdata
+                    plants = filtered
+                    logger.info(f"Skip-existing filter: {before} -> {len(plants)} items to process")
+                except Exception as e:
+                    logger.warning(f"Skip-existing filter failed: {e}")
+            # Pre-segmentation borrowing: use plant12 images for plant13 from the start
+            try:
+                rewired = 0
+                borrow_map: Dict[str, str] = {
+                    'plant13': 'plant12',
+                    'plant14': 'plant13',
+                    'plant15': 'plant14',
+                    'plant16': 'plant15',
+                }
+                for _k in list(plants.keys()):
+                    _parts = _k.split('_')
+                    # Expect keys like YYYY_MM_DD_plantX_frameY
+                    if len(_parts) < 5:
+                        continue
+                    _date_key = "_".join(_parts[:3])
+                    _plant_name = _parts[3]
+                    _frame_token = _parts[4]
+                    # Do NOT borrow on 2025_05_08
+                    if _date_key == '2025_05_08':
+                        continue
+                    if _plant_name not in borrow_map:
+                        continue
+                    _src_plant = borrow_map[_plant_name]
+                    _src_key = f"{_date_key}_{_src_plant}_{_frame_token}"
+                    _src = plants.get(_src_key)
+                    if not _src:
+                        # Fallback: load raw image for source plant directly from disk
+                        try:
+                            from PIL import Image as _Image
+                            _date_folder = _date_key.replace('_', '-')
+                            _frame_num = int(_frame_token.replace('frame', ''))
+                            _date_dir = Path(self.config.paths.input_folder)
+                            # If input folder is a parent of dates, append date folder
+                            if _date_dir.name != _date_folder:
+                                _date_dir = _date_dir / _date_folder
+                            _frame_path = _date_dir / _src_plant / f"{_src_plant}_frame{_frame_num}.tif"
+                            if _frame_path.exists():
+                                _img = _Image.open(str(_frame_path))
+                                _src = {"raw_image": (_img, _frame_path.name), "plant_name": _plant_name, "file_path": str(_frame_path)}
+                            else:
+                                _src = None
+                        except Exception:
+                            _src = None
+                    if not _src:
+                        continue
+                    _tgt = plants[_k]
+                    # Preserve original raw image once
+                    if 'raw_image' in _tgt and 'raw_image_original' not in _tgt:
+                        _tgt['raw_image_original'] = _tgt['raw_image']
+                    if 'raw_image' in _src:
+                        _tgt['raw_image'] = _src['raw_image']
+                        _tgt['borrowed_from'] = _src_plant
+                        rewired += 1
+                if rewired > 0:
+                    logger.info(f"Pre-seg borrowing applied: rewired {rewired} frames for plants 13/14/15/16")
+            except Exception as e:
+                logger.warning(f"Pre-seg borrowing failed: {e}")
+            # Step 2: Create composites
+            logger.info("Step 2/6: Creating composites...")
+            step_start = time.perf_counter()
+            plants = self.preprocessor.create_composites(plants)
+            logger.info(f"Composites done in {(time.perf_counter()-step_start):.2f}s")
+            # Step 3: Segment plants (optionally with bounding boxes)
+            logger.info("Step 3/6: Segmenting plants...")
+            step_start = time.perf_counter()
+            bbox_lookup = None
+            try:
+                bbox_dir = getattr(self.config.paths, 'boundingbox_dir', None)
+                # Default to project BoundingBox dir if unset or falsy
+                if not bbox_dir:
+                    try:
+                        self.config.paths.boundingbox_dir = "/home/grads/f/fahimehorvatinia/Documents/my_full_project/BoundingBox"
+                        bbox_dir = self.config.paths.boundingbox_dir
+                    except Exception:
+                        bbox_dir = None
+                if bbox_dir:
+                    bbox_lookup = self.data_loader.load_bounding_boxes(bbox_dir)
+                    logger.info(f"Loaded bounding boxes from {bbox_dir}")
+            except Exception as e:
+                logger.warning(f"Failed to load bounding boxes: {e}")
+                bbox_lookup = None
+            plants = self._segment_plants(plants, bbox_lookup)
+            logger.info(f"Segmentation done in {(time.perf_counter()-step_start):.2f}s")
+            # Step 3.5: Handle occlusion if enabled
+            if self.enable_occlusion_handling and self.occlusion_handler is not None:
+                logger.info("Step 3.5/6: Handling occlusion with SAM2Long...")
+                step_start = time.perf_counter()
+                plants = self._handle_occlusion(plants)
+                logger.info(f"Occlusion handling done in {(time.perf_counter()-step_start):.2f}s")
+            # Optional: Export RMBG maskouts with white background and run instance segmentation
+            if (run_instance_segmentation or self.enable_instance_integration) and not reuse_instance_results:
+                if not load_all_frames:
+                    logger.warning("Instance segmentation expects all 13 frames; consider running with load_all_frames=True.")
+                logger.info("Step 3.6: Exporting white-background RMBG images for instance segmentation...")
+                # Derive date-specific export/result directories when a single date is present
+                date_keys = set()
+                try:
+                    for _k in plants.keys():
+                        _p = _k.split('_')
+                        if len(_p) >= 3:
+                            date_keys.add("_".join(_p[:3]))
+                except Exception:
+                    pass
+                if len(date_keys) == 1:
+                    date_key = next(iter(date_keys))
+                    base_dir = Path(self.config.paths.output_folder) / date_key
+                    export_dir = base_dir / "instance_input_maskouts"
+                    instance_results_dir = base_dir / "instance_results"
+                else:
+                    export_dir = Path(self.config.paths.output_folder) / "instance_input_maskouts"
+                    instance_results_dir = Path(self.config.paths.output_folder) / "instance_results"
+                export_dir.mkdir(parents=True, exist_ok=True)
+                instance_results_dir.mkdir(parents=True, exist_ok=True)
+                self._export_white_background_maskouts(plants, export_dir)
+                logger.info("Invoking final SAM2Long instance segmentation on exported images...")
+                # Free GPU memory before launching SAM2Long to avoid CUDA OOM
+                self._free_gpu_memory_before_instance()
+                env = os.environ.copy()
+                env["SAM2LONG_IMAGES_DIR"] = str(export_dir)
+                env["SAM2LONG_RESULTS_DIR"] = str(instance_results_dir)
+                # Ensure instance outputs include all frames for all dates
+                try:
+                    env.pop("INSTANCE_OUTPUT_FRAMES", None)
+                except Exception:
+                    pass
+                script_path = "/home/grads/f/fahimehorvatinia/Documents/my_full_project/Experiments3_code/sam2long_instance_integration.py"
+                try:
+                    subprocess.run(["python", script_path], check=True, env=env)
+                except subprocess.CalledProcessError as e:
+                    logger.error(f"Instance segmentation failed: {e}")
+                else:
+                    # Integrate instance masks (track_0 as target) into pdata before feature extraction
+                    try:
+                        self._apply_instance_masks(plants, instance_results_dir)
+                        logger.info("Applied instance segmentation masks to pipeline data")
+                    except Exception as e:
+                        logger.warning(f"Failed to apply instance masks: {e}")
+            elif reuse_instance_results:
+                # Reuse existing instance masks from mapping file
+                if instance_mapping_path is None:
+                    raise ValueError("reuse_instance_results=True requires instance_mapping_path to be provided")
+                try:
+                    self._apply_instance_masks_from_mapping(plants, Path(instance_mapping_path))
+                    logger.info("Applied instance masks from mapping file")
+                except Exception as e:
+                    logger.error(f"Failed to apply instance masks from mapping: {e}")
+            if not segmentation_only:
+                # If reusing instance results with a mapping, restrict features to mapped frames per plant
+                if reuse_instance_results and instance_mapping_path is not None:
+                    try:
+                        import json as _json
+                        _map = _json.load(open(instance_mapping_path, 'r'))
+                        # Normalize map
+                        _norm = {}
+                        for pk, pv in _map.items():
+                            k_norm = pk if str(pk).startswith('plant') else f"plant{int(pk)}" if str(pk).isdigit() else str(pk)
+                            _norm[k_norm] = int(pv.get('frame', 8))
+                        before = len(plants)
+                        plants = {
+                            k: v for k, v in plants.items()
+                            if len(k.split('_')) > 3 and k.split('_')[3] in _norm and k.split('_')[-1] == f"frame{_norm[k.split('_')[3]]}"
+                        }
+                        logger.info(f"Restricted feature extraction by mapping: {before} -> {len(plants)} items")
+                    except Exception as e:
+                        logger.warning(f"Failed to restrict by mapping frames: {e}")
+                # Optional: restrict features to per-plant preferred frame using internal frame rules
+                if respect_instance_frame_rules_for_features:
+                    try:
+                        # Keep this in sync with _apply_instance_masks frame_rules
+                        frame_rules: Dict[str, int] = {
+                            "plant33": 2,
+                            "plant16": 4,
+                            "plant19": 5,
+                            "plant26": 8,
+                            "plant27": 8,
+                            "plant29": 8,
+                            "plant35": 7,
+                            "plant36": 6,
+                            "plant37": 2,
+                            "plant45": 5,
+                        }
+                        before = len(plants)
+                        def _keep(k: str) -> bool:
+                            parts = k.split('_')
+                            if len(parts) < 2:
+                                return False
+                            plant_name = parts[-2]
+                            frame_token = parts[-1]
+                            if not (plant_name.startswith('plant') and frame_token.startswith('frame')):
+                                return False
+                            desired = frame_rules.get(plant_name, 8)
+                            return frame_token == f"frame{desired}"
+                        plants = {k: v for k, v in plants.items() if _keep(k)}
+                        logger.info(f"Restricted feature extraction by per-plant frame rules: {before} -> {len(plants)} items")
+                    except Exception as e:
+                        logger.warning(f"Failed to apply per-plant frame restriction for features: {e}")
+                # Optional: if features_frame_only set, keep only that frame's entries (global single frame)
+                if features_frame_only is not None:
+                    frame_token = f"frame{features_frame_only}"
+                    plants = {k: v for k, v in plants.items() if k.split('_')[-1] == frame_token}
+                    logger.info(f"Restricted feature extraction to {len(plants)} items for {frame_token}")
+                # Optional: substitute feature input image from instance src_rules mapping (e.g., plant14 <- plant13)
+                if substitute_feature_image_from_instance_src:
+                    try:
+                        src_rules: Dict[str, str] = {
+                            "plant13": "plant12",
+                            "plant14": "plant13",
+                            "plant15": "plant14",
+                            "plant16": "plant15",
+                        }
+                        switched = 0
+                        for key in list(plants.keys()):
+                            parts = key.split('_')
+                            if len(parts) < 5:
+                                continue
+                            date_key = "_".join(parts[:3])
+                            plant_name = parts[3]
+                            frame_token = parts[-1]
+                            if plant_name not in src_rules:
+                                continue
+                            src_plant = src_rules[plant_name]
+                            src_key = f"{date_key}_{src_plant}_{frame_token}"
+                            if src_key not in plants:
+                                continue
+                            src_pdata = plants[src_key]
+                            tgt_pdata = plants[key]
+                            # Preserve the original composite used for segmentation for correct overlays later
+                            try:
+                                if 'composite' in tgt_pdata and 'segmentation_composite' not in tgt_pdata:
+                                    tgt_pdata['segmentation_composite'] = tgt_pdata['composite']
+                            except Exception:
+                                pass
+                            # Swap feature inputs: composite and spectral bands
+                            if 'composite' in src_pdata:
+                                tgt_pdata['composite'] = src_pdata['composite']
+                            if 'spectral_stack' in src_pdata:
+                                tgt_pdata['spectral_stack'] = src_pdata['spectral_stack']
+                            # Ensure mask aligns with substituted composite; resize if needed
+                            try:
+                                import cv2 as _cv2
+                                import numpy as _np
+                                comp = tgt_pdata.get('composite')
+                                msk = tgt_pdata.get('mask')
+                                if comp is not None and msk is not None:
+                                    ch, cw = comp.shape[:2]
+                                    mh, mw = msk.shape[:2]
+                                    if (mh, mw) != (ch, cw):
+                                        resized = _cv2.resize(msk.astype('uint8'), (cw, ch), interpolation=_cv2.INTER_NEAREST)
+                                        tgt_pdata['mask'] = resized
+                                        if 'soft_mask' in tgt_pdata and isinstance(tgt_pdata['soft_mask'], _np.ndarray):
+                                            tgt_pdata['soft_mask'] = (resized > 0).astype(_np.float32)
+                                    # Precompute masked composite with white background for saving
+                                    white = _np.full_like(comp, 255, dtype=_np.uint8)
+                                    result = white.copy()
+                                    result[tgt_pdata['mask'] > 0] = comp[tgt_pdata['mask'] > 0]
+                                    tgt_pdata['masked_composite'] = result
+                            except Exception:
+                                pass
+                            switched += 1
+                        if switched > 0:
+                            logger.info(f"Substituted feature images from src_rules for {switched} items")
+                    except Exception as e:
+                        logger.warning(f"Failed feature-image substitution via src_rules: {e}")
+                # Step 4: Extract features
+                logger.info("Step 4/6: Extracting features...")
+                step_start = time.perf_counter()
+                # Stream-save mode: save outputs immediately after each plant's features when fast output is enabled
+                stream_save = False
+                try:
+                    import os as _os
+                    stream_save = bool(int(_os.environ.get('STREAM_SAVE', '0'))) or bool(getattr(self.output_manager, 'fast_mode', False))
+                except Exception:
+                    stream_save = False
+                plants = self._extract_features(plants, stream_save=stream_save)
+                logger.info(f"Features done in {(time.perf_counter()-step_start):.2f}s")
+                # Step 5: Generate outputs (skip if already stream-saved)
+                if not stream_save:
+                    logger.info("Step 5/6: Generating outputs...")
+                    step_start = time.perf_counter()
+                    self._generate_outputs(plants)
+                    logger.info(f"Outputs done in {(time.perf_counter()-step_start):.2f}s")
+                # Step 6: Create summary
+                logger.info("Step 6/6: Creating summary...")
+                summary = self._create_summary(plants)
+            else:
+                logger.info("Segmentation-only mode: skipping texture/vegetation/morphology features and plots")
+                # Segmentation-only: generate only segmentation outputs and a minimal summary
+                logger.info("Step 4/4: Generating segmentation outputs (segmentation-only mode)...")
+                self._generate_outputs(plants)
+                summary = {
+                    "total_plants": len(plants),
+                    "successful_plants": len(plants),
+                    "failed_plants": 0,
+                    "features_extracted": {
+                        "texture": 0,
+                        "vegetation": 0,
+                        "morphology": 0
+                    }
+                }
+            total_time = time.perf_counter() - total_start
+            logger.info(f"Pipeline completed successfully in {total_time:.2f}s!")
+            return {
+                "plants": plants,
+                "summary": summary,
+                "config": self.config,
+                "timing_seconds": total_time
+            }
+        except Exception as e:
+            logger.error(f"Pipeline failed: {e}")
+            raise
+    def _export_white_background_maskouts(self, plants: Dict[str, Any], out_dir: Path) -> None:
+        """Export RMBG composites with white background using the soft/binary masks.
+        Filenames follow: plantX_plantX_frameY_maskout.png so the final instance script can detect plants.
+        """
+        # Clear any previous maskouts to avoid processing stale plants
+        try:
+            if out_dir.exists():
+                for p in out_dir.glob("*_maskout.png"):
+                    try:
+                        p.unlink()
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+        count = 0
+        # Per-plant rule: use bbox-only (skip SAM2Long) for these plants on all dates except 2025_05_08
+        bbox_only_plants: Set[str] = {"plant19", "plant20", "plant27", "plant33", "plant39", "plant42", "plant44", "plant46"}
+        date_exception = "2025_05_08"
+        for key, pdata in plants.items():
+            try:
+                # key format: "YYYY_MM_DD_plantX_frameY"
+                parts = key.split('_')
+                if len(parts) < 3:
+                    continue
+                plant_name = parts[-2]
+                frame_token = parts[-1]  # e.g., frame8
+                if not plant_name.startswith('plant') or not frame_token.startswith('frame'):
+                    continue
+                date_key = "_".join(parts[:3])
+                if (plant_name in bbox_only_plants) and (date_key != date_exception):
+                    # Skip exporting maskouts for bbox-only plants so SAM2Long does not run on them
+                    continue
+                # Extract frame number
+                frame_num = int(frame_token.replace('frame', ''))
+                composite = pdata.get('composite')
+                mask = pdata.get('mask')
+                if composite is None or mask is None:
+                    continue
+                # Ensure 3-channel BGR
+                if len(composite.shape) == 2:
+                    composite_bgr = cv2.cvtColor(composite, cv2.COLOR_GRAY2BGR)
+                else:
+                    composite_bgr = composite
+                out_img = composite_bgr.copy()
+                # Set background to white where mask == 0
+                out_img[mask == 0] = (255, 255, 255)
+                out_path = out_dir / f"{plant_name}_{plant_name}_{frame_token}_maskout.png"
+                cv2.imwrite(str(out_path), out_img)
+                count += 1
+            except Exception as e:
+                logger.warning(f"Failed to export maskout for {key}: {e}")
+        logger.info(f"Exported {count} white-background maskouts to {out_dir}")
+    def _segment_plants(self, plants: Dict[str, Any],
+                       bbox_lookup: Optional[Dict[str, tuple]]) -> Dict[str, Any]:
+        """Segment plants using BRIA model.
+        If bbox_lookup is provided and contains an entry for the plant (e.g., 'plant1'),
+        the image is cropped/masked to the bounding box region before segmentation and the
+        predicted mask is mapped back to the full image size. In bbox mode a largest
+        connected component post-processing is applied to obtain a clean target mask.
+        """
+        total = len(plants)
+        iterator = plants.items()
+        if tqdm is not None:
+            iterator = tqdm(list(plants.items()), desc="Segmenting", total=total, unit="img", leave=False)
+        for idx, (key, pdata) in enumerate(iterator):
+            try:
+                # Get composite image
+                composite = pdata['composite']
+                h, w = composite.shape[:2]
+                # Determine bbox for this plant if available
+                parts = key.split('_')
+                plant_name = parts[-2] if len(parts) >= 2 else None
+                date_key = "_".join(parts[:3]) if len(parts) >= 3 else None  # e.g., 2025_04_16
+                bbox = None
+                if bbox_lookup is not None and plant_name is not None:
+                    # keys in bbox_lookup are typically like 'plant1'
+                    bbox = bbox_lookup.get(plant_name)
+                # For plant33, ignore any bbox and run full-image segmentation on all dates except the exception
+                if plant_name == 'plant33' and date_key != '2025_05_08':
+                    bbox = None
+                # Plants that should use the bounding box itself as the mask (skip model)
+                bbox_only_plants: Set[str] = {"plant19", "plant20", "plant27", "plant39", "plant42", "plant44", "plant46"}
+                use_bbox_only = (plant_name in bbox_only_plants)
+                # Do not use bounding boxes for date 2025_05_08
+                if date_key == '2025_05_08':
+                    bbox = None
+                if bbox is not None:
+                    # Clamp bbox to image
+                    x1, y1, x2, y2 = bbox
+                    x1 = max(0, min(w, int(x1)))
+                    x2 = max(0, min(w, int(x2)))
+                    y1 = max(0, min(h, int(y1)))
+                    y2 = max(0, min(h, int(y2)))
+                    if x2 <= x1 or y2 <= y1:
+                        x1, y1, x2, y2 = 0, 0, w, h
+                    if use_bbox_only:
+                        # Use the bbox as the mask directly (255 inside, 0 outside)
+                        soft_full = np.zeros((h, w), dtype=np.float32)
+                        soft_full[y1:y2, x1:x2] = 1.0
+                        bin_full = np.zeros((h, w), dtype=np.uint8)
+                        bin_full[y1:y2, x1:x2] = 255
+                        pdata['soft_mask'] = soft_full
+                        pdata['mask'] = bin_full
+                    else:
+                        # Segment inside the bbox region and map back
+                        crop = composite[y1:y2, x1:x2]
+                        soft_mask_crop = self.segmentation_manager.segment_image_soft(crop)
+                        soft_full = np.zeros((h, w), dtype=np.float32)
+                        soft_resized = cv2.resize(soft_mask_crop, (x2 - x1, y2 - y1), interpolation=cv2.INTER_LINEAR)
+                        soft_full[y1:y2, x1:x2] = soft_resized
+                        bin_full = (soft_full > 0.5).astype(np.uint8) * 255
+                        try:
+                            n_lbl, labels, stats, _ = cv2.connectedComponentsWithStats(bin_full, 8)
+                            if n_lbl > 1:
+                                largest = 1 + int(np.argmax(stats[1:, cv2.CC_STAT_AREA]))
+                                bin_full = (labels == largest).astype(np.uint8) * 255
+                        except Exception:
+                            pass
+                        pdata['soft_mask'] = soft_full.astype(np.float32)
+                        pdata['mask'] = bin_full.astype(np.uint8)
+                else:
+                    # Full-image segmentation (no bbox)
+                    soft_mask = self.segmentation_manager.segment_image_soft(composite)
+                    pdata['soft_mask'] = soft_mask
+                    pdata['mask'] = (soft_mask * 255.0).astype(np.uint8)
+                # Progress log every 25 items and for first/last
+                if tqdm is None and (idx == 0 or (idx + 1) % 25 == 0 or (idx + 1) == total):
+                    logger.info(f"Segmented {idx + 1}/{total}: {key}")
+            except Exception as e:
+                logger.error(f"Segmentation failed for {key}: {e}")
+                pdata['soft_mask'] = np.zeros(composite.shape[:2], dtype=np.float32)
+                pdata['mask'] = np.zeros(composite.shape[:2], dtype=np.uint8)
+        return plants
+    def _handle_occlusion(self, plants: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Handle occlusion problems using SAM2Long.
+        This method groups plants by their base plant ID and processes
+        each plant's 13-frame sequence to differentiate target plant
+        from neighboring plants.
+        Args:
+            plants: Dictionary of plant data
+        Returns:
+            Updated plant data with occlusion handling results
+        """
+        if self.occlusion_handler is None:
+            logger.warning("Occlusion handler not available, skipping occlusion handling")
+            return plants
+        # Group plants by base plant ID (e.g., "plant1" from "plant1_plant1_frame1")
+        plant_groups = {}
+        for key, pdata in plants.items():
+            # Extract plant ID from key like "plant1_plant1_frame1"
+            parts = key.split('_')
+            if len(parts) >= 3:
+                plant_id = parts[0]  # e.g., "plant1"
+                if plant_id not in plant_groups:
+                    plant_groups[plant_id] = []
+                plant_groups[plant_id].append((key, pdata))
+        logger.info(f"Processing {len(plant_groups)} plant groups for occlusion handling")
+        # Process each plant group
+        for plant_id, plant_frames in plant_groups.items():
+            try:
+                # Sort frames by frame number
+                plant_frames.sort(key=lambda x: int(x[0].split('_')[-1].replace('frame', '')))
+                if len(plant_frames) < 2:
+                    logger.warning(f"Plant {plant_id} has only {len(plant_frames)} frames, skipping")
+                    continue
+                # Extract frames and keys
+                frame_keys = [x[0] for x in plant_frames]
+                frames = [x[1]['composite'] for x in plant_frames]
+                logger.info(f"Processing plant {plant_id} with {len(frames)} frames")
+                # Process with SAM2Long
+                occlusion_results = self.occlusion_handler.segment_plant_sequence(
+                    frames=frames,
+                    target_plant_id=plant_id
+                )
+                # Update plant data with occlusion results
+                target_masks = occlusion_results['target_masks']
+                neighbor_masks = occlusion_results['neighbor_masks']
+                for i, (key, pdata) in enumerate(plant_frames):
+                    if i < len(target_masks):
+                        # Update mask with target plant only
+                        pdata['original_mask'] = pdata.get('mask', np.zeros_like(target_masks[i]))
+                        pdata['mask'] = target_masks[i]
+                        pdata['neighbor_mask'] = neighbor_masks[i]
+                        pdata['occlusion_handled'] = True
+                        # Update soft mask as well
+                        pdata['original_soft_mask'] = pdata.get('soft_mask', np.zeros_like(target_masks[i], dtype=np.float32))
+                        pdata['soft_mask'] = (target_masks[i] / 255.0).astype(np.float32)
+                # Calculate and store occlusion metrics
+                metrics = self.occlusion_handler.get_occlusion_metrics(occlusion_results)
+                for key, pdata in plant_frames:
+                    pdata['occlusion_metrics'] = metrics
+                logger.info(f"Plant {plant_id} occlusion handling completed")
+                logger.info(f"  - Average occlusion ratio: {metrics['average_occlusion_ratio']:.3f}")
+                logger.info(f"  - Frames with occlusion: {metrics['frames_with_occlusion']}")
+            except Exception as e:
+                logger.error(f"Occlusion handling failed for plant {plant_id}: {e}")
+                # Mark as failed but continue
+                for key, pdata in plant_frames:
+                    pdata['occlusion_handled'] = False
+                    pdata['occlusion_error'] = str(e)
+        return plants
+    def _extract_features(self, plants: Dict[str, Any], stream_save: bool = False) -> Dict[str, Any]:
+        """Extract all features from plants.
+        If stream_save is True, save outputs for each plant immediately after
+        its features are computed to improve throughput and reduce peak memory.
+        """
+        total = len(plants)
+        logger.info(f"Extracting features for {total} plants...")
+        iterator = plants.items()
+        if tqdm is not None:
+            iterator = tqdm(list(plants.items()), desc="Extracting features", total=total, unit="img", leave=False)
+        # Prepare output directories once if we're streaming saves
+        if stream_save:
+            try:
+                self.output_manager.create_output_directories()
+            except Exception:
+                pass
+        for idx, (key, pdata) in enumerate(iterator):
+            try:
+                logger.debug(f"Extracting features for {key}")
+                # Extract texture features
+                pdata['texture_features'] = self._extract_texture_features(pdata)
+                # Extract vegetation indices
+                pdata['vegetation_indices'] = self._extract_vegetation_indices(pdata)
+                # Extract morphological features
+                pdata['morphology_features'] = self._extract_morphology_features(pdata)
+                # Immediately save outputs for this plant if streaming is enabled
+                if stream_save:
+                    try:
+                        self.output_manager.save_plant_results(key, pdata)
+                    except Exception as _e:
+                        logger.error(f"Stream-save failed for {key}: {_e}")
+                logger.debug(f"Features extracted for {key}")
+                if tqdm is None and (idx == 0 or (idx + 1) % 25 == 0 or (idx + 1) == total):
+                    logger.info(f"Extracted features for {idx + 1}/{total}: {key}")
+            except Exception as e:
+                logger.error(f"Feature extraction failed for {key}: {e}")
+                # Add empty features
+                pdata['texture_features'] = {}
+                pdata['vegetation_indices'] = {}
+                pdata['morphology_features'] = {}
+        return plants
+    def _extract_texture_features(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract texture features for a single plant."""
+        features = {}
+        # Get bands to process
+        bands = ['color', 'nir', 'red_edge', 'red', 'green', 'pca']
+        for band in bands:
+            try:
+                # Prepare grayscale image
+                gray_image = self._prepare_band_image(pdata, band)
+                # Extract texture features
+                band_features = self.texture_extractor.extract_all_texture_features(gray_image)
+                # Compute statistics using mask3 → features_mask → mask
+                mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+                stats = self.texture_extractor.compute_texture_statistics(band_features, mask)
+                features[band] = {
+                    'features': band_features,
+                    'statistics': stats
+                }
+            except Exception as e:
+                logger.error(f"Texture extraction failed for band {band}: {e}")
+                features[band] = {'features': {}, 'statistics': {}}
+        return features
+    def _extract_vegetation_indices(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract vegetation indices for a single plant."""
+        try:
+            spectral_stack = pdata.get('spectral_stack', {})
+            # Prefer mask3 → features_mask → mask
+            mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+            if not spectral_stack or mask is None:
+                return {}
+            return self.vegetation_extractor.compute_vegetation_indices(
+                spectral_stack, mask
+            )
+        except Exception as e:
+            logger.error(f"Vegetation index extraction failed: {e}")
+            return {}
+    def _extract_morphology_features(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract morphological features for a single plant."""
+        try:
+            composite = pdata.get('composite')
+            # Prefer mask3 → features_mask → mask
+            mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+            if composite is None or mask is None:
+                return {}
+            return self.morphology_extractor.extract_morphology_features(
+                composite, mask
+            )
+        except Exception as e:
+            logger.error(f"Morphology feature extraction failed: {e}")
+            return {}
+    def _prepare_band_image(self, pdata: Dict[str, Any], band: str) -> np.ndarray:
+        """Prepare grayscale image for a specific band."""
+        if band == 'color':
+            composite = pdata['composite']
+            # Prefer mask3 → features_mask → mask
+            mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+            if mask is not None:
+                masked = self.mask_handler.apply_mask_to_image(composite, mask)
+                return cv2.cvtColor(masked, cv2.COLOR_BGR2GRAY)
+            else:
+                return cv2.cvtColor(composite, cv2.COLOR_BGR2GRAY)
+        elif band == 'pca':
+            # Create PCA from spectral bands
+            spectral_stack = pdata.get('spectral_stack', {})
+            # Prefer mask3 → features_mask → mask
+            mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+            if not spectral_stack:
+                return np.zeros((512, 512), dtype=np.uint8)
+            # Stack bands
+            bands_data = []
+            for b in ['nir', 'red_edge', 'red', 'green']:
+                if b in spectral_stack:
+                    arr = spectral_stack[b].squeeze(-1).astype(float)
+                    if mask is not None:
+                        arr = np.where(mask > 0, arr, np.nan)
+                    bands_data.append(arr)
+            if not bands_data:
+                return np.zeros((512, 512), dtype=np.uint8)
+            # Create PCA
+            full_stack = np.stack(bands_data, axis=-1)
+            h, w, c = full_stack.shape
+            flat = full_stack.reshape(-1, c)
+            valid = ~np.isnan(flat).any(axis=1)
+            if valid.sum() == 0:
+                return np.zeros((h, w), dtype=np.uint8)
+            vec = np.zeros(h * w)
+            vec[valid] = PCA(n_components=1, whiten=True).fit_transform(
+                flat[valid]
+            ).squeeze()
+            gray_f = vec.reshape(h, w)
+            if mask is not None:
+                m, M = gray_f[mask > 0].min(), gray_f[mask > 0].max()
+            else:
+                m, M = gray_f.min(), gray_f.max()
+            if M > m:
+                gray = ((gray_f - m) / (M - m) * 255).astype(np.uint8)
+            else:
+                gray = np.zeros_like(gray_f, dtype=np.uint8)
+            return gray
+        else:
+            # Individual spectral band
+            spectral_stack = pdata.get('spectral_stack', {})
+            # Prefer mask3 → features_mask → mask
+            mask = pdata.get('mask3', pdata.get('features_mask', pdata.get('mask')))
+            if band not in spectral_stack:
+                return np.zeros((512, 512), dtype=np.uint8)
+            arr = spectral_stack[band].squeeze(-1).astype(float)
+            if mask is not None:
+                arr = np.where(mask > 0, arr, np.nan)
+            if mask is not None:
+                m, M = np.nanmin(arr), np.nanmax(arr)
+            else:
+                m, M = arr.min(), arr.max()
+            if M > m:
+                gray = ((np.nan_to_num(arr, nan=m) - m) / (M - m) * 255).astype(np.uint8)
+            else:
+                gray = np.zeros_like(arr, dtype=np.uint8)
+            return gray
+    def _generate_outputs(self, plants: Dict[str, Any]) -> None:
+        """Generate all output files and visualizations."""
+        self.output_manager.create_output_directories()
+        for key, pdata in plants.items():
+            try:
+                logger.debug(f"Generating outputs for {key}")
+                self.output_manager.save_plant_results(key, pdata)
+            except Exception as e:
+                logger.error(f"Output generation failed for {key}: {e}")
+    def _create_summary(self, plants: Dict[str, Any]) -> Dict[str, Any]:
+        """Create summary of pipeline results."""
+        summary = {
+            "total_plants": len(plants),
+            "successful_plants": 0,
+            "failed_plants": 0,
+            "features_extracted": {
+                "texture": 0,
+                "vegetation": 0,
+                "morphology": 0
+            }
+        }
+        for key, pdata in plants.items():
+            try:
+                # Check if features were extracted
+                if pdata.get('texture_features'):
+                    summary["features_extracted"]["texture"] += 1
+                if pdata.get('vegetation_indices'):
+                    summary["features_extracted"]["vegetation"] += 1
+                if pdata.get('morphology_features'):
+                    summary["features_extracted"]["morphology"] += 1
+                summary["successful_plants"] += 1
+            except Exception:
+                summary["failed_plants"] += 1
+        return summary
+    def _apply_instance_masks(self, plants: Dict[str, Any], instance_results_dir: Path) -> None:
+        """Replace segmentation masks with SAM2Long instance masks using track_1.
+        Expects files under instance_results_dir/plantX/track_1/frame_YY_mask.png.
+        """
+        # Default and per-plant overrides for source plant, track and preferred frame
+        default_track = "track_0"
+        src_rules: Dict[str, str] = {
+            "plant13": "plant12",
+            "plant14": "plant13",
+            "plant15": "plant14",
+            "plant16": "plant15",
+        }
+        track_rules: Dict[str, str] = {
+            # explicit track rules
+            "plant1": "track_0",
+            "plant4": "track_0",
+            "plant9": "track_3",
+            "plant13": "track_1",
+            "plant14": "track_0",
+            "plant15": "track_0",
+            "plant16": "track_0",
+            "plant18": "track_0",
+            "plant19": "track_0",
+            "plant23": "track_1",
+            "plant26": "track_0",
+            "plant27": "track_0",
+            "plant29": "track_0",
+            "plant31": "track_1",
+            "plant34": "track_1",
+            "plant35": "track_1",
+            "plant36": "track_0",
+            "plant37": "track_1",
+            "plant38": "track_0",
+            "plant39": "track_1",
+            "plant40": "track_0",
+            "plant41": "track_1",
+            "plant42": "track_0",
+            "plant43": "track_0",
+            "plant45": "track_0",
+        }
+        frame_rules: Dict[str, int] = {
+            # preferred frame overrides (1-based)
+            "plant13": 8,
+            "plant14": 8,
+            "plant15": 8,
+            "plant33": 2,
+            "plant16": 4,
+            "plant19": 5,
+            "plant26": 8,
+            "plant27": 8,
+            "plant29": 8,
+            "plant35": 7,
+            "plant36": 6,
+            "plant37": 2,
+            "plant45": 5,
+        }
+        # Per-plant rule: skip applying instance masks (keep bbox/BRIA mask) on all dates except 2025_05_08
+        bbox_only_plants: Set[str] = {"plant19", "plant20", "plant27", "plant33", "plant39", "plant42", "plant44", "plant46"}
+        date_exception = "2025_05_08"
+        for key, pdata in plants.items():
+            try:
+                parts = key.split('_')
+                if len(parts) < 3:
+                    continue
+                plant_name = parts[-2]
+                frame_token = parts[-1]  # frame8
+                if not (plant_name.startswith('plant') and frame_token.startswith('frame')):
+                    continue
+                date_key = "_".join(parts[:3])
+                if (plant_name in bbox_only_plants) and (date_key != date_exception):
+                    # Do not override masks for bbox-only plants
+                    continue
+                frame_num = int(frame_token.replace('frame', ''))
+                # Resolve source plant, track and desired frame
+                src_plant = src_rules.get(plant_name, plant_name)
+                track_name = track_rules.get(plant_name, default_track)
+                desired_frame = frame_rules.get(plant_name, frame_num)
+                plant_dir = Path(instance_results_dir) / src_plant / track_name
+                mask_path = plant_dir / f"frame_{desired_frame:02d}_mask.png"
+                if not mask_path.exists():
+                    # Fallback to current frame if override not found
+                    fallback = plant_dir / f"frame_{frame_num:02d}_mask.png"
+                    if fallback.exists():
+                        mask_path = fallback
+                    else:
+                        # Last-resort: pick any available frame mask in the track directory
+                        try:
+                            candidates = sorted(plant_dir.glob("frame_*_mask.png"))
+                            if len(candidates) > 0:
+                                mask_path = candidates[0]
+                            else:
+                                continue
+                        except Exception:
+                            continue
+                inst_mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
+                if inst_mask is None:
+                    continue
+                # Ensure binary uint8 0/255
+                inst_mask_bin = (inst_mask > 0).astype(np.uint8) * 255
+                pdata['original_mask'] = pdata.get('mask', inst_mask_bin.copy())
+                pdata['mask'] = inst_mask_bin
+                pdata['original_soft_mask'] = pdata.get('soft_mask', (inst_mask_bin / 255.0).astype(np.float32))
+                pdata['soft_mask'] = (inst_mask_bin / 255.0).astype(np.float32)
+                pdata['instance_applied'] = True
+                # Build mask3 = external(mask) AND BRIA(original_mask)
+                try:
+                    _m1 = pdata.get('mask')
+                    _m2 = pdata.get('original_mask')
+                    if isinstance(_m1, np.ndarray) and isinstance(_m2, np.ndarray):
+                        _m1b = (_m1.astype(np.uint8) > 0)
+                        _m2b = (_m2.astype(np.uint8) > 0)
+                        mask3 = (_m1b & _m2b).astype(np.uint8) * 255
+                        pdata['mask3'] = mask3
+                        pdata['features_mask'] = mask3
+                except Exception:
+                    pass
+                # After applying instance masks, also overwrite the composite and spectral stack
+                # with the source plant's raw image (desired frame preferred) so that
+                # feature extraction and saved originals/overlays are consistent with the mask source.
+                try:
+                    if plant_name in src_rules:
+                        date_key = "_".join(parts[:3])
+                        src_key_desired = f"{date_key}_{src_plant}_frame{desired_frame}"
+                        src_key_same = f"{date_key}_{src_plant}_{frame_token}"
+                        copy_from = plants.get(src_key_desired) or plants.get(src_key_same)
+                        if copy_from is None:
+                            # Fallback: load source composite from filesystem if not present in plants dict
+                            try:
+                                from PIL import Image as _Image
+                                _date_folder = date_key.replace('_', '-')
+                                _date_dir = Path(self.config.paths.input_folder)
+                                if _date_dir.name != _date_folder:
+                                    _date_dir = _date_dir / _date_folder
+                                _frame_path = _date_dir / src_plant / f"{src_plant}_frame{desired_frame}.tif"
+                                if not _frame_path.exists():
+                                    _frame_path = _date_dir / src_plant / f"{src_plant}_frame{frame_num}.tif"
+                                if _frame_path.exists():
+                                    _img = _Image.open(str(_frame_path))
+                                    # Process to composite using preprocessor
+                                    comp, spec = self.preprocessor.process_raw_image(_img)
+                                    copy_from = {"composite": comp, "spectral_stack": spec}
+                            except Exception:
+                                copy_from = None
+                        if copy_from is not None:
+                            # Preserve the segmentation-time composite once
+                            if 'composite' in pdata and 'segmentation_composite' not in pdata:
+                                pdata['segmentation_composite'] = pdata['composite']
+                            if 'composite' in copy_from:
+                                pdata['composite'] = copy_from['composite']
+                            if 'spectral_stack' in copy_from:
+                                pdata['spectral_stack'] = copy_from['spectral_stack']
+                            # Ensure mask size matches the copied composite
+                            ch, cw = pdata['composite'].shape[:2]
+                            mh, mw = pdata['mask'].shape[:2]
+                            if (mh, mw) != (ch, cw):
+                                pdata['mask'] = cv2.resize(pdata['mask'].astype('uint8'), (cw, ch), interpolation=cv2.INTER_NEAREST)
+                                pdata['soft_mask'] = (pdata['mask'] > 0).astype(np.float32)
+                except Exception:
+                    pass
+            except Exception as e:
+                logger.debug(f"Instance mask apply failed for {key}: {e}")
+    def _apply_instance_masks_from_mapping(self, plants: Dict[str, Any], mapping_file: Path) -> None:
+        """Apply instance masks using an explicit mapping file with absolute paths.
+        mapping JSON structure:
+        {
+          "plant1": {"frame": 8, "mask_path": "/abs/path/to/plant1/track_X/frame_08_mask.png"},
+          "plant2": {"frame": 8, "mask_path": "/abs/path/.../frame_08_mask.png"},
+          ...
+        }
+        If a plant's mapping specifies a different frame, only entries matching that frame are updated.
+        """
+        import json
+        if not mapping_file.exists():
+            raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
+        with open(mapping_file, "r") as f:
+            mapping = json.load(f)
+        # Normalize mapping plant keys to names like 'plantX'
+        norm_map = {}
+        for k, v in mapping.items():
+            k_norm = k if str(k).startswith("plant") else f"plant{int(k)}" if str(k).isdigit() else str(k)
+            norm_map[k_norm] = v
+        for key, pdata in plants.items():
+            try:
+                parts = key.split('_')
+                if len(parts) < 3:
+                    continue
+                plant_name = parts[-2]
+                frame_token = parts[-1]
+                if not (plant_name.startswith('plant') and frame_token.startswith('frame')):
+                    continue
+                frame_num = int(frame_token.replace('frame', ''))
+                if plant_name not in norm_map:
+                    continue
+                entry = norm_map[plant_name]
+                target_frame = int(entry.get("frame", frame_num))
+                if frame_num != target_frame:
+                    # Only update the designated frame for this plant
+                    continue
+                mask_path_str = entry.get("mask_path")
+                if not mask_path_str:
+                    continue
+                mask_path = Path(mask_path_str)
+                if not mask_path.exists():
+                    logger.warning(f"Mask path not found for {plant_name} {frame_token}: {mask_path}")
+                    continue
+                inst_mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
+                if inst_mask is None:
+                    continue
+                inst_mask_bin = (inst_mask > 0).astype(np.uint8) * 255
+                pdata['original_mask'] = pdata.get('mask', inst_mask_bin.copy())
+                pdata['mask'] = inst_mask_bin
+                pdata['original_soft_mask'] = pdata.get('soft_mask', (inst_mask_bin / 255.0).astype(np.float32))
+                pdata['soft_mask'] = (inst_mask_bin / 255.0).astype(np.float32)
+                pdata['instance_applied'] = True
+                # Build mask3 = external(mask) AND BRIA(original_mask)
+                try:
+                    _m1 = pdata.get('mask')
+                    _m2 = pdata.get('original_mask')
+                    if isinstance(_m1, np.ndarray) and isinstance(_m2, np.ndarray):
+                        _m1b = (_m1.astype(np.uint8) > 0)
+                        _m2b = (_m2.astype(np.uint8) > 0)
+                        mask3 = (_m1b & _m2b).astype(np.uint8) * 255
+                        pdata['mask3'] = mask3
+                        pdata['features_mask'] = mask3
+                except Exception:
+                    pass
+            except Exception as e:
+                logger.debug(f"Instance mapping apply failed for {key}: {e}")
+def run_pipeline(config_path: str, load_all_frames: bool = False, segmentation_only: bool = False, filter_plants: Optional[List[str]] = None) -> Dict[str, Any]:
+    """
+    Convenience function to run the pipeline.
+    Args:
+        config_path: Path to configuration file
+        load_all_frames: Whether to load all frames or selected frames
+        segmentation_only: If True, run segmentation only and skip feature extraction
+    Returns:
+        Pipeline results
+    """
+    pipeline = SorghumPipeline(config_path)
+    return pipeline.run(load_all_frames, segmentation_only, filter_plants)
+if __name__ == "__main__":
+    import sys
+    config_path = sys.argv[1] if len(sys.argv) > 1 else "config.yml"
+    load_all = "--all" in sys.argv
+    seg_only = "--seg-only" in sys.argv
+    # Basic arg parse for --plant=<name>
+    plant_filter = None
+    for arg in sys.argv[1:]:
+        if arg.startswith("--plant="):
+            plant_filter = [arg.split("=", 1)[1]]
+    try:
+        results = run_pipeline(config_path, load_all, seg_only, plant_filter)
+        print("Pipeline completed successfully!")
+        print(f"Processed {results['summary']['total_plants']} plants")
+    except Exception as e:
+        print(f"Pipeline failed: {e}")
+        sys.exit(1)

sorghum_pipeline/segmentation/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""
+Segmentation modules for the Sorghum Pipeline.
+This package contains segmentation functionality including:
+- BRIA model integration
+- Mask post-processing
+- Segmentation validation
+"""
+from .manager import SegmentationManager
+__all__ = ["SegmentationManager"]

sorghum_pipeline/segmentation/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (482 Bytes). View file

sorghum_pipeline/segmentation/__pycache__/advanced_occlusion_handler.cpython-312.pyc ADDED Viewed

Binary file (26.3 kB). View file

sorghum_pipeline/segmentation/__pycache__/leaf_occlusion_handler.cpython-312.pyc ADDED Viewed

Binary file (27 kB). View file

sorghum_pipeline/segmentation/__pycache__/manager.cpython-312.pyc ADDED Viewed

Binary file (14.5 kB). View file

sorghum_pipeline/segmentation/__pycache__/occlusion_handler.cpython-312.pyc ADDED Viewed

Binary file (20.2 kB). View file

sorghum_pipeline/segmentation/manager.py ADDED Viewed

	@@ -0,0 +1,309 @@

+"""
+Segmentation manager for the Sorghum Pipeline.
+This module handles image segmentation using the BRIA model
+and provides post-processing capabilities.
+"""
+import numpy as np
+import cv2
+import torch
+from PIL import Image
+from torchvision import transforms
+from transformers import AutoModelForImageSegmentation
+from typing import Optional, Tuple
+import logging
+logger = logging.getLogger(__name__)
+class SegmentationManager:
+    """Manages image segmentation using BRIA model."""
+    def __init__(self,
+                 model_name: str = "briaai/RMBG-2.0",
+                 device: str = "auto",
+                 threshold: float = 0.5,
+                 trust_remote_code: bool = True,
+                 cache_dir: Optional[str] = None,
+                 local_files_only: bool = False):
+        """
+        Initialize segmentation manager.
+        Args:
+            model_name: Name of the BRIA model
+            device: Device to run model on ("auto", "cpu", "cuda")
+            threshold: Segmentation threshold
+            trust_remote_code: Whether to trust remote code
+            cache_dir: Hugging Face cache directory for model weights
+            local_files_only: If True, only load from local cache
+        """
+        self.model_name = model_name
+        self.threshold = threshold
+        self.trust_remote_code = trust_remote_code
+        self.cache_dir = cache_dir
+        self.local_files_only = local_files_only
+        # Determine device
+        if device == "auto":
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        # Initialize model
+        self.model = None
+        self.transform = None
+        self._load_model()
+    def _load_model(self):
+        """Load the BRIA segmentation model."""
+        try:
+            logger.info(f"Loading BRIA model: {self.model_name}")
+            self.model = AutoModelForImageSegmentation.from_pretrained(
+                self.model_name,
+                trust_remote_code=self.trust_remote_code,
+                cache_dir=self.cache_dir if self.cache_dir else None,
+                local_files_only=self.local_files_only,
+            ).eval().to(self.device)
+            # Define image transform
+            self.transform = transforms.Compose([
+                transforms.Resize((1024, 1024)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+            ])
+            logger.info("BRIA model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load BRIA model: {e}")
+            raise
+    def segment_image(self, image: np.ndarray) -> np.ndarray:
+        """
+        Segment an image using the BRIA model.
+        Args:
+            image: Input image (BGR format)
+        Returns:
+            Binary mask (0/255)
+        """
+        if self.model is None:
+            raise RuntimeError("Model not loaded")
+        try:
+            # Convert BGR to RGB
+            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(rgb_image)
+            # Apply transform
+            input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
+            # Run inference
+            with torch.no_grad():
+                predictions = self.model(input_tensor)[-1].sigmoid().cpu()[0].squeeze(0).numpy()
+            # Apply threshold
+            mask = (predictions > self.threshold).astype(np.uint8) * 255
+            # Resize back to original size
+            original_size = (image.shape[1], image.shape[0])  # (width, height)
+            mask_resized = cv2.resize(mask, original_size, interpolation=cv2.INTER_NEAREST)
+            return mask_resized
+        except Exception as e:
+            logger.error(f"Segmentation failed: {e}")
+            # Return empty mask
+            return np.zeros(image.shape[:2], dtype=np.uint8)
+    def segment_image_soft(self, image: np.ndarray) -> np.ndarray:
+        """
+        Segment an image and return a soft mask in [0, 1] resized to original size.
+        No thresholding or post-processing is applied.
+        Args:
+            image: Input image (BGR format)
+        Returns:
+            Float mask in [0,1] with shape (H, W)
+        """
+        if self.model is None:
+            raise RuntimeError("Model not loaded")
+        try:
+            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(rgb_image)
+            input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
+            with torch.no_grad():
+                preds = self.model(input_tensor)[-1].sigmoid().cpu()[0].squeeze(0).numpy()
+            original_size = (image.shape[1], image.shape[0])
+            soft_mask = cv2.resize(preds.astype(np.float32), original_size, interpolation=cv2.INTER_LINEAR)
+            return np.clip(soft_mask, 0.0, 1.0)
+        except Exception as e:
+            logger.error(f"Soft segmentation failed: {e}")
+            return np.zeros(image.shape[:2], dtype=np.float32)
+    def post_process_mask(self, mask: np.ndarray,
+                         min_area: int = 1000,
+                         kernel_size: int = 5) -> np.ndarray:
+        """
+        Post-process segmentation mask.
+        Args:
+            mask: Input mask
+            min_area: Minimum area for connected components
+            kernel_size: Kernel size for morphological operations
+        Returns:
+            Post-processed mask
+        """
+        try:
+            # Morphological opening to remove noise
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+            opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+            # Remove small connected components
+            num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
+                opened, connectivity=8
+            )
+            processed_mask = np.zeros_like(opened)
+            for label in range(1, num_labels):  # Skip background
+                if stats[label, cv2.CC_STAT_AREA] >= min_area:
+                    processed_mask[labels == label] = 255
+            return processed_mask
+        except Exception as e:
+            logger.error(f"Mask post-processing failed: {e}")
+            return mask
+    def keep_largest_component(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Keep only the largest connected component.
+        Args:
+            mask: Input mask
+        Returns:
+            Mask with only the largest component
+        """
+        try:
+            num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, 8)
+            if num_labels <= 1:
+                return mask
+            # Find the largest component (excluding background)
+            areas = stats[1:, cv2.CC_STAT_AREA]
+            largest_label = 1 + np.argmax(areas)
+            # Create mask with only the largest component
+            largest_mask = (labels == largest_label).astype(np.uint8) * 255
+            return largest_mask
+        except Exception as e:
+            logger.error(f"Largest component extraction failed: {e}")
+            return mask
+    def validate_mask(self, mask: np.ndarray) -> bool:
+        """
+        Validate segmentation mask.
+        Args:
+            mask: Mask to validate
+        Returns:
+            True if valid, False otherwise
+        """
+        if mask is None:
+            return False
+        if not isinstance(mask, np.ndarray):
+            return False
+        if mask.ndim != 2:
+            return False
+        if mask.dtype not in [np.uint8, np.bool_]:
+            return False
+        # Check if mask has any foreground pixels
+        if np.sum(mask > 0) == 0:
+            logger.warning("Mask has no foreground pixels")
+            return False
+        return True
+    def get_mask_properties(self, mask: np.ndarray) -> dict:
+        """
+        Get properties of the segmentation mask.
+        Args:
+            mask: Binary mask
+        Returns:
+            Dictionary of mask properties
+        """
+        if not self.validate_mask(mask):
+            return {}
+        try:
+            # Convert to binary
+            binary_mask = (mask > 127).astype(np.uint8)
+            # Calculate properties
+            area = np.sum(binary_mask)
+            perimeter = 0
+            # Find contours
+            contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if contours:
+                perimeter = cv2.arcLength(contours[0], True)
+                # Bounding box
+                x, y, w, h = cv2.boundingRect(contours[0])
+                bbox_area = w * h
+                aspect_ratio = w / h if h > 0 else 0
+            else:
+                bbox_area = 0
+                aspect_ratio = 0
+            return {
+                "area": int(area),
+                "perimeter": float(perimeter),
+                "bbox_area": int(bbox_area),
+                "aspect_ratio": float(aspect_ratio),
+                "coverage": float(area) / (mask.shape[0] * mask.shape[1]) if mask.size > 0 else 0.0,
+                "num_components": len(contours)
+            }
+        except Exception as e:
+            logger.error(f"Mask property calculation failed: {e}")
+            return {}
+    def create_overlay(self, image: np.ndarray, mask: np.ndarray,
+                      color: Tuple[int, int, int] = (0, 255, 0),
+                      alpha: float = 0.5) -> np.ndarray:
+        """
+        Create overlay of mask on image.
+        Args:
+            image: Base image
+            mask: Binary mask
+            color: Overlay color (B, G, R)
+            alpha: Overlay transparency
+        Returns:
+            Image with mask overlay
+        """
+        try:
+            overlay = image.copy()
+            overlay[mask == 255] = color
+            return cv2.addWeighted(image, 1.0 - alpha, overlay, alpha, 0)
+        except Exception as e:
+            logger.error(f"Overlay creation failed: {e}")
+            return image