Spaces:

vertify
/

biomass-prediction-app

Running

App Files Files Community

pokkiri commited on May 19, 2025

Commit

7929990

verified ·

1 Parent(s): 8f42556

Update feature_engineering.py

Browse files

Files changed (1) hide show

feature_engineering.py +677 -328

feature_engineering.py CHANGED Viewed

@@ -1,360 +1,709 @@
-"""
-Feature engineering module for biomass prediction.
-This module generates the exact 99 features needed by the model.
 Author: najahpokkiri
-Date: 2025-05-17
 """
 import numpy as np
-from sklearn.preprocessing import StandardScaler
-from sklearn.decomposition import PCA
 import logging
 # Configure logger
 logger = logging.getLogger(__name__)
-def safe_divide(a, b, fill_value=0.0):
-    """Safe division that handles zeros in the denominator"""
-    a = np.asarray(a, dtype=np.float32)
-    b = np.asarray(b, dtype=np.float32)
-    # Handle NaN/Inf in inputs
-    a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
-    b = np.nan_to_num(b, nan=1e-10, posinf=1e10, neginf=-1e10)
-    mask = np.abs(b) < 1e-10
-    result = np.full_like(a, fill_value, dtype=np.float32)
-    if np.any(~mask):
-        result[~mask] = a[~mask] / b[~mask]
-    return np.nan_to_num(result, nan=fill_value, posinf=fill_value, neginf=fill_value)
-def calculate_spectral_indices(satellite_data):
-    """Calculate the 7 spectral indices needed by the model"""
-    indices = {}
-    # Use band indices based on position in the file
-    # Adjust these if your band order is different
-    blue = satellite_data[1] if satellite_data.shape[0] > 1 else None
-    green = satellite_data[2] if satellite_data.shape[0] > 2 else None
-    red = satellite_data[3] if satellite_data.shape[0] > 3 else None
-    nir = satellite_data[7] if satellite_data.shape[0] > 7 else None
-    swir1 = satellite_data[9] if satellite_data.shape[0] > 9 else None
-    swir2 = satellite_data[10] if satellite_data.shape[0] > 10 else None
-    # Calculate NDVI (Normalized Difference Vegetation Index)
-    if red is not None and nir is not None:
-        indices['NDVI'] = safe_divide(nir - red, nir + red)
-        # Calculate EVI (Enhanced Vegetation Index)
-        if blue is not None:
-            indices['EVI'] = 2.5 * safe_divide(nir - red, nir + 6.0 * red - 7.5 * blue + 1.0)
-        # Calculate SAVI (Soil Adjusted Vegetation Index)
-        indices['SAVI'] = 1.5 * safe_divide(nir - red, nir + red + 0.5)
-        # Calculate MSAVI2 (Modified Soil Adjusted Vegetation Index)
-        indices['MSAVI2'] = 0.5 * (2.0 * nir + 1.0 - np.sqrt((2.0 * nir + 1.0)**2 - 8.0 * (nir - red)))
-    # Calculate NDWI (Normalized Difference Water Index)
-    if green is not None and nir is not None:
-        indices['NDWI'] = safe_divide(green - nir, green + nir)
-    # Calculate NDMI (Normalized Difference Moisture Index)
-    if nir is not None and swir1 is not None:
-        indices['NDMI'] = safe_divide(nir - swir1, nir + swir1)
-    # Calculate NBR (Normalized Burn Ratio)
-    if nir is not None and swir2 is not None:
-        indices['NBR'] = safe_divide(nir - swir2, nir + swir2)
-    # Ensure we have all required indices by providing defaults if calculation failed
-    required_indices = ['NDVI', 'EVI', 'SAVI', 'MSAVI2', 'NDWI', 'NDMI', 'NBR']
-    for idx in required_indices:
-        if idx not in indices:
-            logger.warning(f"Could not calculate {idx}, using zeros instead")
-            indices[idx] = np.zeros_like(satellite_data[0])
-    return indices
-def extract_texture_features(satellite_data):
-    """Extract the 5 texture features needed by the model"""
-    texture_features = {}
-    height, width = satellite_data.shape[1], satellite_data.shape[2]
-    # Use band 7 (NIR) for texture features
-    b7_idx = 7
-    if satellite_data.shape[0] <= b7_idx:
-        logger.warning(f"Band 7 not available for texture features. Using band 0 instead.")
-        b7_idx = 0
-    band = satellite_data[b7_idx].copy()
-    band = np.nan_to_num(band, nan=0.0)
-    try:
-        # Import skimage for texture features
-        try:
-            from skimage.filters import sobel
-            from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
-        except ImportError:
-            logger.warning("scikit-image not found. Using placeholder texture features.")
-            # Provide placeholder features
-            texture_features['Sobel_B7'] = np.zeros_like(band)
-            texture_features['LBP_B7'] = np.zeros_like(band)
-            texture_features['GLCM_contrast_B7'] = np.zeros_like(band)
-            texture_features['GLCM_dissimilarity_B7'] = np.zeros_like(band)
-            texture_features['GLCM_homogeneity_B7'] = np.zeros_like(band)
-            texture_features['GLCM_energy_B7'] = np.zeros_like(band)
-            return texture_features
-        # 1. Sobel filter for edge detection
-        sobel_filtered = sobel(band)
-        texture_features['Sobel_B7'] = sobel_filtered
-        # 2. Local Binary Pattern
-        # Normalize band to 0-255 range for LBP
-        band_norm = band.copy()
-        if np.any(~np.isnan(band)):
-            band_min, band_max = np.nanpercentile(band, [1, 99])
-            if band_max > band_min:
-                band_norm = np.clip((band - band_min) / (band_max - band_min + 1e-8) * 255, 0, 255).astype(np.uint8)
-        else:
-            band_norm = np.zeros_like(band, dtype=np.uint8)
-        # Calculate LBP
-        lbp = local_binary_pattern(band_norm, 8, 1, method='uniform')
-        texture_features['LBP_B7'] = lbp
-        # 3. GLCM properties
-        # Create sample patch for GLCM calculation
-        sample_size = min(128, height, width)
-        center_y, center_x = height // 2, width // 2
-        offset = sample_size // 2
-        y_start = max(0, center_y - offset)
-        y_end = min(height, center_y + offset)
-        x_start = max(0, center_x - offset)
-        x_end = min(width, center_x + offset)
-        patch = band_norm[y_start:y_end, x_start:x_end]
-        # Calculate GLCM properties if patch is valid
-        if patch.size > 0:
-            glcm = graycomatrix(patch, [1], [0], levels=256, symmetric=True, normed=True)
-            for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy']:
-                try:
-                    value = float(graycoprops(glcm, prop)[0, 0])
-                    texture_features[f'GLCM_{prop}_B7'] = np.full_like(band, value)
-                except:
-                    texture_features[f'GLCM_{prop}_B7'] = np.zeros_like(band)
-        else:
-            # Create placeholder GLCM features if patch is invalid
-            for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy']:
-                texture_features[f'GLCM_{prop}_B7'] = np.zeros_like(band)
-    except Exception as e:
-        logger.error(f"Error in texture feature extraction: {e}")
-        # Provide placeholder features in case of error
-        texture_features['Sobel_B7'] = np.zeros_like(band)
-        texture_features['LBP_B7'] = np.zeros_like(band)
-        texture_features['GLCM_contrast_B7'] = np.zeros_like(band)
-        texture_features['GLCM_dissimilarity_B7'] = np.zeros_like(band)
-        texture_features['GLCM_homogeneity_B7'] = np.zeros_like(band)
-        texture_features['GLCM_energy_B7'] = np.zeros_like(band)
-    return texture_features
-def calculate_spatial_features(satellite_data, indices):
-    """Calculate the 2 spatial features needed by the model"""
-    spatial_features = {}
-    # 1. Gradient of Band 7 (NIR)
-    b7_idx = 7
-    if satellite_data.shape[0] <= b7_idx:
-        logger.warning(f"Band 7 not available for gradient calculation. Using band 0 instead.")
-        b7_idx = 0
-    band = satellite_data[b7_idx].copy()
-    band = np.nan_to_num(band, nan=0.0)
-    try:
-        # Calculate the gradient magnitude
-        grad_y, grad_x = np.gradient(band)
-        grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
-        spatial_features['Gradient_B7'] = grad_magnitude
-    except Exception as e:
-        logger.warning(f"Error calculating band gradient: {e}")
-        spatial_features['Gradient_B7'] = np.zeros_like(band)
-    # 2. NDVI gradient
-    try:
-        ndvi = indices.get('NDVI', np.zeros_like(band))
-        ndvi = np.nan_to_num(ndvi, nan=0.0)
-        # Calculate the gradient magnitude for NDVI
-        grad_y, grad_x = np.gradient(ndvi)
-        grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
-        spatial_features['NDVI_gradient'] = grad_magnitude
-    except Exception as e:
-        logger.warning(f"Error calculating NDVI gradient: {e}")
-        spatial_features['NDVI_gradient'] = np.zeros_like(band)
-    return spatial_features
-def calculate_pca_features(satellite_data, n_components=25):
-    """Calculate the 25 PCA components needed by the model"""
-    pca_features = {}
-    # Set a fixed number of components
-    n_components = 25  # Always use exactly 25 components
-    try:
-        # Reshape to (bands, pixels)
-        n_bands, height, width = satellite_data.shape
-        bands_flat = satellite_data.reshape(n_bands, -1).T
-        # Handle NaN values
-        valid_mask = ~np.any(np.isnan(bands_flat), axis=1)
-        if not np.any(valid_mask):
-            logger.warning("No valid pixels found for PCA calculation")
-            # Create placeholder PCA features
-            for i in range(1, n_components + 1):
-                pca_features[f'PCA_{i:02d}'] = np.zeros((height, width), dtype=np.float32)
-            return pca_features
-        bands_valid = bands_flat[valid_mask]
-        # Standardize valid data
-        scaler = StandardScaler()
-        bands_scaled = scaler.fit_transform(bands_valid)
-        # Calculate PCA
-        pca = PCA(n_components=min(n_components, bands_scaled.shape[1], bands_scaled.shape[0]))
-        pca_result = pca.fit_transform(bands_scaled)
-        # Extend to full 25 components if needed
-        actual_components = pca_result.shape[1]
-        if actual_components < n_components:
-            logger.warning(f"Only {actual_components} PCA components calculated, padding to {n_components}")
-            padding = np.zeros((pca_result.shape[0], n_components - actual_components))
-            pca_result = np.hstack([pca_result, padding])
-        # Map back to original pixels
-        pca_all = np.zeros((bands_flat.shape[0], n_components))
-        pca_all[valid_mask] = pca_result
-        # Reshape to spatial dimensions
-        pca_spatial = pca_all.reshape(height, width, n_components)
-        # Store each component with the correct naming
-        for i in range(1, n_components + 1):
-            pca_features[f'PCA_{i:02d}'] = pca_spatial[:, :, i-1]
     except Exception as e:
-        logger.error(f"Error calculating PCA features: {e}")
-        # Create placeholder PCA features
-        for i in range(1, n_components + 1):
-            pca_features[f'PCA_{i:02d}'] = np.zeros((height, width), dtype=np.float32)
-    return pca_features
-def extract_all_features(satellite_data):
-    """
-    Extract exactly 99 features needed by the model:
-    - 59 original bands
-    - 7 spectral indices
-    - 5 texture features
-    - 2 spatial features
-    - 25 PCA components
-    Parameters:
-        satellite_data (ndarray): Array of shape (bands, height, width)
-    Returns:
-        features_array (ndarray): Array of shape (valid_pixels, 99)
-        valid_mask (ndarray): Boolean mask of valid pixels
-        feature_names (list): List of 99 feature names
-    """
-    logger.info("Extracting features for biomass prediction...")
-    height, width = satellite_data.shape[1], satellite_data.shape[2]
-    # Create valid pixel mask (no NaN or Inf values)
-    valid_mask = np.all(np.isfinite(satellite_data), axis=0)
-    valid_y, valid_x = np.where(valid_mask)
-    n_valid = len(valid_y)
-    logger.info(f"Found {n_valid} valid pixels out of {height*width}")
-    # Generate all feature categories
-    logger.info("Calculating spectral indices...")
-    indices = calculate_spectral_indices(satellite_data)
-    logger.info("Extracting texture features...")
-    texture_features = extract_texture_features(satellite_data)
-    logger.info("Calculating spatial features...")
-    spatial_features = calculate_spatial_features(satellite_data, indices)
-    logger.info("Computing PCA components...")
-    pca_features = calculate_pca_features(satellite_data)
-    # Define the ordered list of feature names
-    feature_names = []
-    # 1. Add original band names (Band_01 through Band_59)
-    for i in range(1, 60):
-        feature_names.append(f'Band_{i:02d}')
-    # 2. Add spectral indices
-    spectral_indices = ['NDVI', 'EVI', 'SAVI', 'MSAVI2', 'NDWI', 'NDMI', 'NBR']
-    feature_names.extend(spectral_indices)
-    # 3. Add texture features
-    texture_names = ['Sobel_B7', 'LBP_B7', 'GLCM_contrast_B7', 'GLCM_dissimilarity_B7',
-                    'GLCM_homogeneity_B7', 'GLCM_energy_B7']
-    feature_names.extend(texture_names)
-    # 4. Add spatial features
-    spatial_names = ['Gradient_B7', 'NDVI_gradient']
-    feature_names.extend(spatial_names)
-    # 5. Add PCA components
-    for i in range(1, 26):
-        feature_names.append(f'PCA_{i:02d}')
-    # Create feature dictionary with all features
-    all_features = {}
-    # 1. Original bands
-    for i in range(min(satellite_data.shape[0], 59)):
-        all_features[f'Band_{i+1:02d}'] = satellite_data[i]
-    # Pad with zeros if we have fewer than 59 bands
-    for i in range(satellite_data.shape[0], 59):
-        all_features[f'Band_{i+1:02d}'] = np.zeros((height, width), dtype=np.float32)
-    # 2. Add other feature categories
-    all_features.update(indices)
-    all_features.update(texture_features)
-    all_features.update(spatial_features)
-    all_features.update(pca_features)
-    # Verify we have exactly 99 features
-    assert len(feature_names) == 99, f"Expected 99 features, but got {len(feature_names)}"
-    # Extract feature values for valid pixels
-    feature_matrix = np.zeros((n_valid, len(feature_names)), dtype=np.float32)
-    for i, name in enumerate(feature_names):
-        if name in all_features:
-            feature_data = all_features[name]
-            if feature_data.ndim == 2:
-                feature_values = feature_data[valid_y, valid_x]
-            else:
-                feature_values = np.full(n_valid, feature_data)
-            feature_matrix[:, i] = np.nan_to_num(feature_values, nan=0.0)
-        else:
-            logger.warning(f"Feature '{name}' not found, using zeros")
-            feature_matrix[:, i] = 0.0
-    logger.info(f"Successfully extracted {len(feature_names)} features for {n_valid} pixels")
-    return feature_matrix, valid_mask, feature_names

+def create_interface(self):
+        """Create Gradio interface with sample image thumbnails"""
+        # Generate thumbnails for sample images
+        sample_thumbnails = {}
+        for name, path in self.sample_images.items():
+            if os.path.exists(path):
+                thumbnail = self.create_thumbnail(path)
+                if thumbnail:
+                    sample_thumbnails[name] = Image.open(thumbnail)
+            else:
+                logger.warning(f"Sample image not found: {path}")
+        with gr.Blocks(title="Biomass Prediction Model") as interface:
+            gr.Markdown("# Above-Ground Biomass Prediction")
+            gr.Markdown("""
+            Upload a multi-band satellite image to predict above-ground biomass (AGB) across the landscape.
+            **Requirements:**
+            - Image must be a GeoTIFF with spectral bands
+            - For best results, image should contain at least 3 bands
+            """)
+            with gr.Row():
+                with gr.Column(scale=1):
+                    input_image = gr.File(
+                        label="Upload Satellite Image (GeoTIFF)",
+                        file_types=[".tif", ".tiff"]
+                    )
+                    # Sample images section
+                    gr.Markdown("### Sample Images")
+                    # Sample buttons container
+                    sample_buttons = []
+                    # First row - sample thumbnails side by side horizontally
+                    with gr.Row():
+                        for name, thumbnail in sample_thumbnails.items():
+                            with gr.Column():
+                                gr.Image(
+                                    value=thumbnail,
+                                    label=name.replace("input_", "Input ").replace("chip_", "Chip "),
+                                    show_download_button=False,
+                                    height=180
+                                )
+                    # Second row - buttons side by side horizontally, matching the thumbnails above
+                    with gr.Row():
+                        for name, _ in sample_thumbnails.items():
+                            with gr.Column():
+                                sample_btn = gr.Button(
+                                    f"Use {name.replace('input_', 'Input ').replace('chip_', 'Chip ')}",
+                                    variant="secondary",
+                                    size="lg"
+                                )
+                                sample_buttons.append((sample_btn, name))
+                    # Generate button at the bottom
+                    generate_btn = gr.Button("Generate Biomass Prediction", variant="primary", size="lg")
+                with gr.Column(scale=2):
+                    output_image = gr.Image(
+                        label="Biomass Prediction Map",
+                        type="pil"
+                    )
+                    output_stats = gr.Markdown(
+                        label="Statistics"
+                    )
+            with gr.Accordion("About", open=False):
+                gr.Markdown("""
+                ## About This Model
+                This biomass prediction model uses the StableResNet architecture to predict above-ground biomass from satellite imagery.
+                ### Model Details
+                - Architecture: StableResNet
+                - Input: Multi-spectral satellite imagery
+                - Output: Above-ground biomass (Mg/ha)
+                - Creator: vertify.earth
+                - Date: 2025-05-19
+                ### Improvements in This Version
+                - Added calibration factor to match full-tile inference values
+                - Improved chunk processing with overlap to reduce edge artifacts
+                - Enhanced feature calculation for better results
+                - Optimized visualization to show the full range of biomass values
+                """)
+            # Add a warning if model failed to load
+            if self.model is None:
+                gr.Warning("⚠️ Model failed to load. The app may not work correctly. Check logs for details.")
+            # Connect the process button
+            generate_btn.click(
+                fn=self.predict_biomass,
+                inputs=[input_image],
+                outputs=[output_image, output_stats]
+            )
+            # Connect the sample buttons
+            for button, name in sample_buttons:
+                button.click(
+                    fn=lambda path=self.sample_images[name]: self.predict_biomass(path),
+                    inputs=[],
+                    outputs=[output_image, output_stats]
+                )
+        return interface
+def launch_app():
+    """Launch the Gradio app"""
+    try:
+        # Create app instance
+        app = BiomassPredictorApp()
+        # Create interface
+        interface = app.create_interface()
+        # Launch interface
+        interface.launch()
+    except Exception as e:
+        logger.error(f"Error launching app: {e}")
+        logger.error(traceback.format_exc())
+if __name__ == "__main__":
+    launch_app()"""
+Biomass Prediction Gradio App with Two Sample Images and RGB Comparison
 Author: najahpokkiri
+Date: 2025-05-19
+Updated with sample image thumbnails and always-on RGB comparison.
 """
+import os
+import sys
+import torch
 import numpy as np
+import gradio as gr
+import joblib
+import tempfile
+import matplotlib.pyplot as plt
+import matplotlib.colors as colors
+from PIL import Image
+import io
 import logging
+from huggingface_hub import hf_hub_download
+import rasterio
 # Configure logger
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# Import model architecture
+from model import StableResNet
+# Define a placeholder for feature engineering if not available
+def extract_all_features(image):
+    """
+    Extract all 99 features from satellite bands.
+    Placeholder function - in production, use the actual feature_engineering module.
+    """
+    # Get image dimensions
+    n_bands, height, width = image.shape
+    # Create a valid mask (non-NaN pixels)
+    valid_mask = np.all(np.isfinite(image), axis=0)
+    # Get valid pixel coordinates
+    valid_y, valid_x = np.where(valid_mask)
+    n_valid = len(valid_y)
+    # Create a feature matrix (placeholder)
+    # In a real scenario, these would be spectral indices, texture features, etc.
+    # For now, we'll just use the original bands and pad to 99 features
+    # Original bands for each valid pixel
+    feature_matrix = np.zeros((n_valid, 99), dtype=np.float32)
+    # Fill in the available band values
+    for i in range(n_valid):
+        y, x = valid_y[i], valid_x[i]
+        # Copy available bands
+        for b in range(min(n_bands, 99)):
+            feature_matrix[i, b] = image[b, y, x]
+    # Create feature names
+    generated_features = [f"Band_{i+1}" for i in range(99)]
+    return feature_matrix, valid_mask, generated_features
+class BiomassPredictorApp:
+    """Gradio app for biomass prediction from satellite imagery"""
+    def __init__(self, model_repo="pokkiri/biomass-model"):
+        """Initialize the app with model repository information"""
+        self.model = None
+        self.package = None
+        self.feature_names = []
+        self.model_repo = model_repo
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Sample image paths
+        self.sample_images = {
+            "input_chip_1": "input_chip_1.tif",
+            "input_chip_2": "input_chip_2.tif"
+        }
+        # Cache for storing temporary files
+        self.temp_files = []
+        # Load the model
+        self.load_model()
+    def load_model(self):
+        """Load the model and preprocessing pipeline"""
+        try:
+            logger.info(f"Loading model from {self.model_repo}")
+            # Download model files from HuggingFace or use local files
+            try:
+                model_path = hf_hub_download(repo_id=self.model_repo, filename="model.pt")
+                package_path = hf_hub_download(repo_id=self.model_repo, filename="model_package.pkl")
+            except Exception as e:
+                logger.warning(f"Failed to download from HuggingFace: {e}")
+                # Fallback to local files
+                model_path = "model.pt"
+                package_path = "model_package.pkl"
+            # Try to load package with metadata
+            try:
+                logger.info(f"Loading package from {package_path}")
+                self.package = joblib.load(package_path)
+                logger.info("Successfully loaded model package")
+                # Extract information from package
+                n_features = self.package['n_features']
+                self.feature_names = self.package.get('feature_names', [f"feature_{i}" for i in range(n_features)])
+                logger.info(f"Package keys: {list(self.package.keys())}")
+                logger.info(f"Model expects {n_features} features")
+            except Exception as e:
+                logger.error(f"Error loading package file: {e}")
+                # Fallback to default values
+                n_features = 99  # We know there are 99 features
+                self.feature_names = [f"feature_{i}" for i in range(n_features)]
+                # Create a minimal package with essential components
+                self.package = {
+                    'n_features': n_features,
+                    'use_log_transform': True,
+                    'epsilon': 1.0,
+                    'scaler': None  # Will handle the None case in prediction
+                }
+            # Initialize model
+            self.model = StableResNet(n_features=n_features)
+            self.model.load_state_dict(torch.load(model_path, map_location=self.device))
+            self.model.to(self.device)
+            self.model.eval()
+            logger.info(f"Model loaded successfully")
+            logger.info(f"Number of features: {n_features}")
+            logger.info(f"Using device: {self.device}")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return False
+    def cleanup(self):
+        """Clean up temporary files"""
+        for tmp_path in self.temp_files:
+            try:
+                if os.path.exists(tmp_path):
+                    os.unlink(tmp_path)
+            except Exception as e:
+                logger.warning(f"Failed to remove temporary file {tmp_path}: {e}")
+        self.temp_files = []
+    def create_thumbnail(self, image_path, max_size=(200, 200), output_format="PNG"):
+        """Create a thumbnail image from a GeoTIFF"""
+        try:
+            if not os.path.exists(image_path):
+                logger.warning(f"Image file not found: {image_path}")
+                return None
+            # Open the GeoTIFF
+            with rasterio.open(image_path) as src:
+                # Read data with RGB bands if available
+                if src.count >= 3:
+                    # Use first three bands as RGB
+                    rgb_data = src.read([1, 2, 3])
+                    # Transpose from (bands, height, width) to (height, width, bands)
+                    rgb_data = np.transpose(rgb_data, (1, 2, 0))
+                    # Normalize to 0-255 range
+                    rgb_data = np.clip(rgb_data, 0, None)  # Clip negative values
+                    for i in range(3):
+                        p2 = np.percentile(rgb_data[:,:,i], 2)
+                        p98 = np.percentile(rgb_data[:,:,i], 98)
+                        if p98 > p2:
+                            rgb_data[:,:,i] = np.clip((rgb_data[:,:,i] - p2) / (p98 - p2) * 255, 0, 255)
+                        else:
+                            rgb_data[:,:,i] = np.clip(rgb_data[:,:,i] / (rgb_data[:,:,i].max() or 1) * 255, 0, 255)
+                    # Convert to uint8
+                    rgb_data = rgb_data.astype(np.uint8)
+                    # Create PIL image
+                    img = Image.fromarray(rgb_data)
+                else:
+                    # Use first band as grayscale
+                    gray_data = src.read(1)
+                    # Normalize to 0-255 range
+                    p2 = np.percentile(gray_data, 2)
+                    p98 = np.percentile(gray_data, 98)
+                    if p98 > p2:
+                        gray_data = np.clip((gray_data - p2) / (p98 - p2) * 255, 0, 255)
+                    else:
+                        gray_data = np.clip(gray_data / (gray_data.max() or 1) * 255, 0, 255)
+                    # Convert to uint8
+                    gray_data = gray_data.astype(np.uint8)
+                    # Create PIL image
+                    img = Image.fromarray(gray_data, mode='L')
+            # Resize to thumbnail
+            img.thumbnail(max_size)
+            # Save to bytes buffer
+            buf = io.BytesIO()
+            img.save(buf, format=output_format)
+            buf.seek(0)
+            return buf
+        except Exception as e:
+            logger.error(f"Error creating thumbnail: {e}")
+            return None
+    def predict_biomass(self, image_file):
+        """Predict biomass from a satellite image with RGB comparison"""
+        if self.model is None:
+            return None, "Error: Model not loaded. Please check logs for details."
+        if image_file is None:
+            return None, "Error: No file uploaded. Please upload a GeoTIFF file or use one of the sample images."
+        try:
+            # Check if we're using a sample image (string path) or an uploaded file
+            if isinstance(image_file, str):
+                logger.info(f"Using sample image: {image_file}")
+                tmp_path = image_file  # Use the sample path directly
+                cleanup_tmp = False  # Don't delete the sample file
+            else:
+                # Create a temporary file to save the uploaded file
+                with tempfile.NamedTemporaryFile(suffix='.tif', delete=False) as tmp_file:
+                    tmp_path = tmp_file.name
+                    with open(image_file.name, 'rb') as f:
+                        tmp_file.write(f.read())
+                # Add to list for cleanup later
+                self.temp_files.append(tmp_path)
+                cleanup_tmp = True
+            # Open the image file
+            with rasterio.open(tmp_path) as src:
+                image = src.read()
+                height, width = image.shape[1], image.shape[2]
+                transform = src.transform
+                crs = src.crs
+                logger.info(f"Processing image: {height}x{width} pixels, {image.shape[0]} bands")
+                # Validate minimum band count
+                if image.shape[0] < 3:
+                    return None, f"Error: Image has only {image.shape[0]} bands. At least 3 bands are required for RGB visualization."
+                # Generate all features using feature engineering
+                logger.info("Generating all 99 features from bands...")
+                feature_matrix, valid_mask, generated_features = extract_all_features(image)
+                # Verify we have exactly 99 features
+                if feature_matrix.shape[1] != 99:
+                    logger.error(f"Error: Generated {feature_matrix.shape[1]} features, but model expects 99.")
+                    return None, f"Error: Generated {feature_matrix.shape[1]} features, but model expects 99."
+                # Apply feature scaling if available
+                try:
+                    if 'scaler' in self.package and self.package['scaler'] is not None:
+                        logger.info("Applying feature scaling...")
+                        feature_matrix = self.package['scaler'].transform(feature_matrix)
+                except Exception as e:
+                    logger.warning(f"Error applying scaler: {e}. Using original features.")
+                # Initialize predictions array
+                predictions = np.zeros((height, width), dtype=np.float32)
+                # Get valid pixel coordinates
+                valid_y, valid_x = np.where(valid_mask)
+                # Make predictions
+                logger.info(f"Running model inference on {len(valid_y)} valid pixels...")
+                with torch.no_grad():
+                    # Process in batches to avoid memory issues
+                    batch_size = 10000
+                    for i in range(0, len(valid_y), batch_size):
+                        end_idx = min(i + batch_size, len(valid_y))
+                        batch = feature_matrix[i:end_idx]
+                        # Convert to tensor
+                        batch_tensor = torch.tensor(batch, dtype=torch.float32).to(self.device)
+                        # Get predictions
+                        batch_predictions = self.model(batch_tensor).cpu().numpy()
+                        # Handle scalar case for single-item batches
+                        if batch_predictions.ndim == 0:
+                            batch_predictions = np.array([batch_predictions])
+                        # Convert from log scale if needed
+                        if self.package.get('use_log_transform', True):
+                            epsilon = self.package.get('epsilon', 1.0)
+                            batch_predictions = np.exp(batch_predictions) - epsilon
+                            batch_predictions = np.maximum(batch_predictions, 0)  # Ensure non-negative
+                        # Map predictions back to image
+                        for j, pred in enumerate(batch_predictions):
+                            y_idx = valid_y[i + j]
+                            x_idx = valid_x[i + j]
+                            predictions[y_idx, x_idx] = pred
+                        # Log progress
+                        if (i // batch_size) % 5 == 0 or end_idx == len(valid_y):
+                            logger.info(f"Processed {end_idx}/{len(valid_y)} pixels")
+                # Create visualization - always RGB+Biomass side-by-side
+                logger.info("Creating RGB + Biomass visualization...")
+                # Create side-by-side comparison (RGB and Biomass)
+                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
+                # Prepare RGB image - try different band combinations if needed
+                rgb_bands = [3, 2, 1]  # Common RGB combination (R,G,B)
+                # Check if we have enough bands for RGB
+                if image.shape[0] < 3:
+                    logger.warning(f"Image has only {image.shape[0]} bands, using available bands for display")
+                    rgb_bands = list(range(min(3, image.shape[0])))
+                    while len(rgb_bands) < 3:
+                        rgb_bands.append(0)  # Pad with zeros if needed
+                # Create RGB image
+                rgb = np.zeros((height, width, 3), dtype=np.float32)
+                for i, band_idx in enumerate(rgb_bands):
+                    if band_idx < image.shape[0]:
+                        rgb[:, :, i] = image[band_idx]
+                # Handle potential NaN values
+                rgb = np.nan_to_num(rgb)
+                # Enhance contrast with percentile-based normalization
+                for i in range(3):
+                    p2 = np.percentile(rgb[:,:,i], 2)
+                    p98 = np.percentile(rgb[:,:,i], 98)
+                    if p98 > p2:
+                        rgb[:,:,i] = np.clip((rgb[:,:,i] - p2) / (p98 - p2), 0, 1)
+                # Display RGB image
+                ax1.imshow(rgb)
+                ax1.set_title('RGB Image')
+                ax1.axis('off')
+                # Display biomass prediction
+                masked_predictions = np.ma.masked_where(~valid_mask, predictions)
+                vmin = np.percentile(predictions[valid_mask], 1)
+                vmax = np.percentile(predictions[valid_mask], 99)
+                im = ax2.imshow(masked_predictions, cmap='viridis', vmin=vmin, vmax=vmax)
+                fig.colorbar(im, ax=ax2, label='Biomass (Mg/ha)')
+                ax2.set_title('Predicted Biomass')
+                ax2.axis('off')
+                # Add super title
+                plt.suptitle('RGB Image and Biomass Prediction', fontsize=16)
+                plt.tight_layout()
+                # Save figure to bytes buffer
+                buf = io.BytesIO()
+                fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
+                buf.seek(0)
+                plt.close(fig)
+                # Calculate summary statistics
+                valid_predictions = predictions[valid_mask]
+                stats = {
+                    'Mean Biomass': f"{np.mean(valid_predictions):.2f} Mg/ha",
+                    'Median Biomass': f"{np.median(valid_predictions):.2f} Mg/ha",
+                    'Min Biomass': f"{np.min(valid_predictions):.2f} Mg/ha",
+                    'Max Biomass': f"{np.max(valid_predictions):.2f} Mg/ha"
+                }
+                # Add area and total biomass if transform is available
+                if transform is not None:
+                    pixel_area_m2 = abs(transform[0] * transform[4])  # Assuming square pixels
+                    total_biomass = np.sum(valid_predictions) * (pixel_area_m2 / 10000)  # Convert to hectares
+                    area_hectares = np.sum(valid_mask) * (pixel_area_m2 / 10000)
+                    stats['Total Biomass'] = f"{total_biomass:.2f} Mg"
+                    stats['Area'] = f"{area_hectares:.2f} hectares"
+                # Format statistics as markdown
+                stats_md = "### Biomass Statistics\n\n"
+                stats_md += "| Metric | Value |\n|--------|-------|\n"
+                for k, v in stats.items():
+                    stats_md += f"| {k} | {v} |\n"
+                # Add processing info
+                stats_md += f"\n\n*Processed {np.sum(valid_mask):,} valid pixels with {feature_matrix.shape[1]} features*"
+                # Cleanup temporary files if needed
+                if cleanup_tmp:
+                    self.cleanup()
+                # Return visualization and statistics
+                return Image.open(buf), stats_md
+        except Exception as e:
+            # Ensure cleanup even on error
+            self.cleanup()
+            import traceback
+            logger.error(f"Error predicting biomass: {e}")
+            logger.error(traceback.format_exc())
+            return None, f"Error predicting biomass: {str(e)}\n\nPlease check logs for details."
+    def create_interface(self):
+        """Create Gradio interface with sample image thumbnails"""
+        # Generate thumbnails for sample images
+        sample_thumbnails = {}
+        for name, path in self.sample_images.items():
+            if os.path.exists(path):
+                thumbnail = self.create_thumbnail(path)
+                if thumbnail:
+                    sample_thumbnails[name] = Image.open(thumbnail)
+            else:
+                logger.warning(f"Sample image not found: {path}")
+        with gr.Blocks(title="Biomass Prediction Model") as interface:
+            gr.Markdown("# Above-Ground Biomass Prediction")
+            gr.Markdown("""
+            Upload a multi-band satellite image to predict above-ground biomass (AGB) across the landscape.
+            **Requirements:**
+            - Image must be a GeoTIFF with spectral bands
+            - For best results, image should contain at least 3 bands
+            """)
+            with gr.Row():
+                with gr.Column(scale=1):
+                    input_image = gr.File(
+                        label="Upload Satellite Image (GeoTIFF)",
+                        file_types=[".tif", ".tiff"]
+                    )
+                    # Sample images section
+                    gr.Markdown("### Sample Images")
+                    # Sample buttons container
+                    sample_buttons = []
+                    # First row - sample thumbnails side by side horizontally
+                    with gr.Row():
+                        for name, thumbnail in sample_thumbnails.items():
+                            with gr.Column():
+                                gr.Image(
+                                    value=thumbnail,
+                                    label=name.replace("input_", "Input ").replace("chip_", "Chip "),
+                                    show_download_button=False,
+                                    height=180
+                                )
+                    # Second row - buttons side by side horizontally, matching the thumbnails above
+                    with gr.Row():
+                        for name, _ in sample_thumbnails.items():
+                            with gr.Column():
+                                sample_btn = gr.Button(
+                                    f"Use {name.replace('input_', 'Input ').replace('chip_', 'Chip ')}",
+                                    variant="secondary",
+                                    size="lg"
+                                )
+                                sample_buttons.append((sample_btn, name))
+                    # Generate button at the bottom
+                    generate_btn = gr.Button("Generate Biomass Prediction", variant="primary", size="lg")
+                with gr.Column(scale=2):
+                    output_image = gr.Image(
+                        label="Biomass Prediction Map",
+                        type="pil"
+                    )
+                    output_stats = gr.Markdown(
+                        label="Statistics"
+                    )_image = gr.Image(
+                        label="Biomass Prediction Map",
+                        type="pil"
+                    )
+                    output_stats = gr.Markdown(
+                        label="Statistics"
+                    )
+            # Sample images section with thumbnails in a separate row
+            gr.Markdown("### Sample Images")
+            with gr.Row():
+                # Only show thumbnails for images that were found
+                sample_buttons = []
+                # Create a column for each sample image
+                for name, thumbnail in sample_thumbnails.items():
+                    with gr.Column():
+                        gr.Image(value=thumbnail, label=name.replace("input_", "Input ").replace("chip_", "Chip "),
+                                show_download_button=False, show_label=True, height=200)
+                        sample_btn = gr.Button(f"Use {name.replace('input_', 'Input ').replace('chip_', 'Chip ')}",
+                                            size="lg", variant="secondary")
+                        sample_buttons.append((sample_btn, name))
+                with gr.Column(scale=2):
+                    output_image = gr.Image(
+                        label="Biomass Prediction Map",
+                        type="pil"
+                    )
+                    output_stats = gr.Markdown(
+                        label="Statistics"
+                    )
+            with gr.Accordion("About", open=False):
+                gr.Markdown("""
+                ## About This Model
+                This biomass prediction model uses the StableResNet architecture to predict above-ground biomass from satellite imagery.
+                ### Model Details
+                - Architecture: StableResNet
+                - Input: Multi-spectral satellite imagery
+                - Output: Above-ground biomass (Mg/ha)
+                - Creator: vertify.earth for GIZ Forest Forward
+                - Date: 2025-05-19
+                ### How It Works
+                1. The model extracts features from each pixel in the satellite image
+                2. These features include spectral bands, vegetation indices, texture metrics, and more
+                3. The model outputs a biomass prediction for each pixel
+                4. Results are visualized as RGB and biomass prediction side-by-side
+                """)
+            # Add a warning if model failed to load
+            if self.model is None:
+                gr.Warning("⚠️ Model failed to load. The app may not work correctly. Check logs for details.")
+            # Connect the process button
+            process_btn.click(
+                fn=self.predict_biomass,
+                inputs=[input_image],
+                outputs=[output_image, output_stats]
+            )
+            # Connect the sample buttons
+            for button, name in sample_buttons:
+                button.click(
+                    fn=lambda path=self.sample_images[name]: self.predict_biomass(path),
+                    inputs=[],
+                    outputs=[output_image, output_stats]
+                )
+        return interface
+def launch_app():
+    """Launch the Gradio app"""
+    try:
+        # Create app instance
+        app = BiomassPredictorApp()
+        # Create interface
+        interface = app.create_interface()
+        # Launch interface
+        interface.launch()
     except Exception as e:
+        logger.error(f"Error launching app: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+if __name__ == "__main__":
+    launch_app()