File size: 9,258 Bytes
b4123b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
"""
Configuration management for the Sorghum Pipeline.
This module handles all configuration settings, paths, and parameters
used throughout the pipeline.
"""
import os
import yaml
from pathlib import Path
from typing import Dict, Any, Optional
from dataclasses import dataclass, field
@dataclass
class Paths:
"""Configuration for all file paths."""
input_folder: str
output_folder: str
boundingbox_dir: Optional[str] = None
labels_folder: Optional[str] = None
def __post_init__(self):
"""Ensure all paths are absolute where provided."""
self.input_folder = os.path.abspath(self.input_folder)
self.output_folder = os.path.abspath(self.output_folder)
if self.boundingbox_dir:
self.boundingbox_dir = os.path.abspath(self.boundingbox_dir)
if self.labels_folder:
self.labels_folder = os.path.abspath(self.labels_folder)
@dataclass
class ProcessingParams:
"""Parameters for image processing."""
# Image processing
target_size: tuple = (1024, 1024)
gaussian_blur_kernel: int = 5
morphology_kernel_size: int = 7
min_component_area: int = 1000
# Segmentation
segmentation_threshold: float = 0.5
max_components: int = 10
# Texture analysis
lbp_points: int = 8
lbp_radius: int = 1
hog_orientations: int = 9
hog_pixels_per_cell: tuple = (8, 8)
hog_cells_per_block: tuple = (2, 2)
lacunarity_window: int = 15
ehd_threshold: float = 0.3
angle_resolution: int = 45
# Vegetation indices
epsilon: float = 1e-10
soil_factor: float = 0.16
# Morphology
pixel_to_cm: float = 0.1099609375
prune_sizes: list = field(default_factory=lambda: [200, 100, 50, 30, 10])
@dataclass
class OutputSettings:
"""Settings for output generation."""
save_images: bool = True
save_plots: bool = True
save_metadata: bool = True
image_dpi: int = 150
plot_dpi: int = 100
image_format: str = "png"
# Subdirectories
segmentation_dir: str = "segmentation"
features_dir: str = "features"
texture_dir: str = "texture"
morphology_dir: str = "morphology"
vegetation_dir: str = "vegetation_indices"
analysis_dir: str = "analysis"
@dataclass
class ModelSettings:
"""Settings for ML models."""
device: str = "auto" # auto, cpu, cuda
model_name: str = "briaai/RMBG-2.0"
batch_size: int = 1
trust_remote_code: bool = True
cache_dir: str = ""
local_files_only: bool = False
class Config:
"""Main configuration class for the Sorghum Pipeline."""
def __init__(self, config_path: Optional[str] = None):
"""
Initialize configuration.
Args:
config_path: Path to YAML configuration file. If None, uses defaults.
"""
self.paths = Paths(
input_folder="",
output_folder="",
boundingbox_dir=""
)
self.processing = ProcessingParams()
self.output = OutputSettings()
self.model = ModelSettings()
if config_path:
self.load_from_file(config_path)
def load_from_file(self, config_path: str) -> None:
"""Load configuration from YAML file."""
config_path = Path(config_path)
if not config_path.exists():
raise FileNotFoundError(f"Configuration file not found: {config_path}")
with open(config_path, 'r') as f:
config_data = yaml.safe_load(f)
# Update paths
if 'paths' in config_data:
self.paths = Paths(**config_data['paths'])
# Update processing parameters
if 'processing' in config_data:
for key, value in config_data['processing'].items():
if hasattr(self.processing, key):
setattr(self.processing, key, value)
# Update output settings
if 'output' in config_data:
for key, value in config_data['output'].items():
if hasattr(self.output, key):
setattr(self.output, key, value)
# Update model settings
if 'model' in config_data:
for key, value in config_data['model'].items():
if hasattr(self.model, key):
setattr(self.model, key, value)
def save_to_file(self, config_path: str) -> None:
"""Save current configuration to YAML file."""
config_data = {
'paths': {
'input_folder': self.paths.input_folder,
'output_folder': self.paths.output_folder,
'boundingbox_dir': self.paths.boundingbox_dir,
'labels_folder': self.paths.labels_folder
},
'processing': {
'target_size': self.processing.target_size,
'gaussian_blur_kernel': self.processing.gaussian_blur_kernel,
'morphology_kernel_size': self.processing.morphology_kernel_size,
'min_component_area': self.processing.min_component_area,
'segmentation_threshold': self.processing.segmentation_threshold,
'max_components': self.processing.max_components,
'lbp_points': self.processing.lbp_points,
'lbp_radius': self.processing.lbp_radius,
'hog_orientations': self.processing.hog_orientations,
'hog_pixels_per_cell': self.processing.hog_pixels_per_cell,
'hog_cells_per_block': self.processing.hog_cells_per_block,
'lacunarity_window': self.processing.lacunarity_window,
'ehd_threshold': self.processing.ehd_threshold,
'angle_resolution': self.processing.angle_resolution,
'epsilon': self.processing.epsilon,
'soil_factor': self.processing.soil_factor,
'pixel_to_cm': self.processing.pixel_to_cm,
'prune_sizes': self.processing.prune_sizes
},
'output': {
'save_images': self.output.save_images,
'save_plots': self.output.save_plots,
'save_metadata': self.output.save_metadata,
'image_dpi': self.output.image_dpi,
'plot_dpi': self.output.plot_dpi,
'image_format': self.output.image_format,
'segmentation_dir': self.output.segmentation_dir,
'features_dir': self.output.features_dir,
'texture_dir': self.output.texture_dir,
'morphology_dir': self.output.morphology_dir,
'vegetation_dir': self.output.vegetation_dir,
'analysis_dir': self.output.analysis_dir
},
'model': {
'device': self.model.device,
'model_name': self.model.model_name,
'batch_size': self.model.batch_size,
'trust_remote_code': self.model.trust_remote_code,
'cache_dir': self.model.cache_dir,
'local_files_only': self.model.local_files_only,
}
}
with open(config_path, 'w') as f:
yaml.dump(config_data, f, default_flow_style=False, indent=2)
def get_device(self) -> str:
"""Get the appropriate device for processing."""
if self.model.device == "auto":
import torch
return "cuda" if torch.cuda.is_available() else "cpu"
return self.model.device
def create_output_directories(self, base_path: str) -> None:
"""Ensure base output directory exists only.
Subdirectories are created per plant in the output manager.
"""
base_path = Path(base_path)
base_path.mkdir(parents=True, exist_ok=True)
def validate(self) -> bool:
"""Validate configuration settings."""
# Check if input directory exists
if not os.path.exists(self.paths.input_folder):
raise FileNotFoundError(f"Input folder does not exist: {self.paths.input_folder}")
# Check if bounding box directory exists (optional)
if hasattr(self.paths, 'boundingbox_dir') and self.paths.boundingbox_dir and not os.path.exists(self.paths.boundingbox_dir):
raise FileNotFoundError(f"Bounding box directory does not exist: {self.paths.boundingbox_dir}")
# Validate processing parameters
if self.processing.target_size[0] <= 0 or self.processing.target_size[1] <= 0:
raise ValueError("Target size must be positive")
if self.processing.segmentation_threshold < 0 or self.processing.segmentation_threshold > 1:
raise ValueError("Segmentation threshold must be between 0 and 1")
return True
def create_default_config(output_path: str) -> None:
"""Create a default configuration file."""
config = Config()
config.paths = Paths(
input_folder="Sorghum_dataset",
output_folder="Sorghum_pipeline_Results",
boundingbox_dir="boundingbox",
labels_folder="labels"
)
config.save_to_file(output_path)
print(f"Default configuration created at: {output_path}")
|