Spaces:

NextDrought
/

ForestAI-TreeExtraction

Sleeping

App Files Files Community

Yang Cao commited on Apr 19, 2025

Commit

e383754

1 Parent(s): b8778f9

with geoai for building extraction

Browse files

Files changed (11) hide show

.gitignore +0 -0
app.py +54 -14
static/js/map.js +118 -37
static/js/upload.js +23 -16
utils/__pycache__/__init__.cpython-312.pyc +0 -0
utils/__pycache__/advanced_extraction.cpython-312.pyc +0 -0
utils/__pycache__/geospatial.cpython-312.pyc +0 -0
utils/__pycache__/image_processing.cpython-312.pyc +0 -0
utils/__pycache__/segmentation.cpython-312.pyc +0 -0
utils/advanced_extraction.py +230 -0
utils/geospatial.py +156 -114

.gitignore ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import json
 from werkzeug.utils import secure_filename
 from utils.image_processing import process_image
 from utils.geospatial import process_image_to_geojson
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
@@ -38,42 +39,81 @@ def upload_file():
     # Check if a file was uploaded
     if 'file' not in request.files:
         return jsonify({'error': 'No file part'}), 400
     file = request.files['file']
     # Check if a file was selected
     if file.filename == '':
         return jsonify({'error': 'No file selected'}), 400
     # Get feature type, default to buildings if not specified
     feature_type = request.form.get('feature_type', 'buildings')
     logging.info(f"Processing image for feature type: {feature_type}")
     # Check if the file is an allowed type
     if file and allowed_file(file.filename):
         # Generate a unique filename to prevent collisions
         original_filename = secure_filename(file.filename)
         file_extension = original_filename.rsplit('.', 1)[1].lower()
         unique_filename = f"{uuid.uuid4().hex}.{file_extension}"
         # Save the uploaded file
         file_path = os.path.join(UPLOAD_FOLDER, unique_filename)
         file.save(file_path)
         try:
             # Process the image
             processed_image_path = process_image(file_path, PROCESSED_FOLDER)
-            # Convert processed image to GeoJSON using improved processing with feature type
-            geojson_data = process_image_to_geojson(processed_image_path, feature_type=feature_type)
             # Save GeoJSON to file
             geojson_filename = f"{uuid.uuid4().hex}.geojson"
             geojson_path = os.path.join(PROCESSED_FOLDER, geojson_filename)
             with open(geojson_path, 'w') as f:
                 json.dump(geojson_data, f)
             return jsonify({
                 'success': True,
                 'filename': unique_filename,
@@ -81,11 +121,11 @@ def upload_file():
                 'feature_type': feature_type,
                 'geojson': geojson_data
             })
         except Exception as e:
             logging.error(f"Error processing file: {str(e)}")
             return jsonify({'error': f'Error processing file: {str(e)}'}), 500
     return jsonify({'error': 'File type not allowed'}), 400
 @app.route('/download/<filename>')

 from werkzeug.utils import secure_filename
 from utils.image_processing import process_image
 from utils.geospatial import process_image_to_geojson
+from utils.advanced_extraction import extract_features_from_geotiff
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
     # Check if a file was uploaded
     if 'file' not in request.files:
         return jsonify({'error': 'No file part'}), 400
     file = request.files['file']
     # Check if a file was selected
     if file.filename == '':
         return jsonify({'error': 'No file selected'}), 400
     # Get feature type, default to buildings if not specified
     feature_type = request.form.get('feature_type', 'buildings')
     logging.info(f"Processing image for feature type: {feature_type}")
     # Check if the file is an allowed type
     if file and allowed_file(file.filename):
         # Generate a unique filename to prevent collisions
         original_filename = secure_filename(file.filename)
         file_extension = original_filename.rsplit('.', 1)[1].lower()
         unique_filename = f"{uuid.uuid4().hex}.{file_extension}"
         # Save the uploaded file
         file_path = os.path.join(UPLOAD_FOLDER, unique_filename)
         file.save(file_path)
         try:
             # Process the image
             processed_image_path = process_image(file_path, PROCESSED_FOLDER)
+            # Log the original file path for debugging
+            logging.info(f"Original file path: {file_path}")
+            # Extract coordinates directly from the original file for debugging
+            try:
+                import rasterio
+                from rasterio.warp import transform_bounds
+                logging.info(f"Attempting to read coordinates directly from {file_path}")
+                with rasterio.open(file_path) as src:
+                    if src.crs is not None:
+                        bounds = src.bounds
+                        logging.info(f"Raw bounds from rasterio: {bounds}")
+                        logging.info(f"CRS: {src.crs}")
+                        # Transform bounds to WGS84 (lat/lon) if needed
+                        if src.crs.to_epsg() != 4326:
+                            west, south, east, north = transform_bounds(
+                                src.crs, 'EPSG:4326',
+                                bounds.left, bounds.bottom, bounds.right, bounds.top
+                            )
+                            logging.info(f"Transformed bounds (WGS84): W:{west}, S:{south}, E:{east}, N:{north}")
+                        else:
+                            west, south, east, north = bounds
+                            logging.info(f"Bounds already in WGS84: W:{west}, S:{south}, E:{east}, N:{north}")
+                    else:
+                        logging.warning(f"No CRS found in the file {file_path}")
+            except Exception as e:
+                logging.error(f"Error extracting coordinates directly: {str(e)}")
+            # Check if the file is a GeoTIFF for advanced processing
+            is_geotiff = file_path.lower().endswith(('.tif', '.tiff'))
+            if is_geotiff:
+                # Use advanced extraction for GeoTIFF files
+                logging.info(f"Using advanced extraction for GeoTIFF file with feature type: {feature_type}")
+                geojson_data = extract_features_from_geotiff(file_path, PROCESSED_FOLDER, feature_type=feature_type)
+            else:
+                # Fall back to basic processing for non-GeoTIFF files
+                logging.info(f"Using basic processing for non-GeoTIFF file with feature type: {feature_type}")
+                geojson_data = process_image_to_geojson(processed_image_path, feature_type=feature_type, original_file_path=file_path)
             # Save GeoJSON to file
             geojson_filename = f"{uuid.uuid4().hex}.geojson"
             geojson_path = os.path.join(PROCESSED_FOLDER, geojson_filename)
             with open(geojson_path, 'w') as f:
                 json.dump(geojson_data, f)
             return jsonify({
                 'success': True,
                 'filename': unique_filename,
                 'feature_type': feature_type,
                 'geojson': geojson_data
             })
         except Exception as e:
             logging.error(f"Error processing file: {str(e)}")
             return jsonify({'error': f'Error processing file: {str(e)}'}), 500
     return jsonify({'error': 'File type not allowed'}), 400
 @app.route('/download/<filename>')

static/js/map.js CHANGED Viewed

@@ -8,27 +8,31 @@ let map = null;
 let currentFeatureType = 'buildings';
 // Initialize the map with default settings
-function initMap() {
     // If map already exists, remove it and create a new one
     if (map !== null) {
         map.remove();
     }
-    // Default to Rio de Janeiro, Brazil (location of our sample data)
-    // This helps users see where the extracted features should appear
-    map = L.map('map').setView([-22.96, -43.38], 13);
-    // Attempt to detect Brazil imagery based on coordinates in the URL
-    if (window.location.search.includes('region=brazil')) {
-        map.setView([-22.96, -43.38], 13);
     }
     // Define tile layers
     const osmLayer = L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
         attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors',
         maxZoom: 19
     });
     const satelliteLayer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', {
         attribution: 'Imagery &copy; Esri',
         maxZoom: 19
@@ -36,13 +40,13 @@ function initMap() {
     // Add OpenStreetMap layer by default
     osmLayer.addTo(map);
     // Add layer control
     const baseLayers = {
         "OpenStreetMap": osmLayer,
         "Satellite": satelliteLayer
     };
     L.control.layers(baseLayers, null, {position: 'topright'}).addTo(map);
     // Add a scale control
@@ -53,30 +57,37 @@ function initMap() {
 // Display GeoJSON data on the map
 function displayGeoJSON(geojsonData) {
     if (!map) {
-        initMap();
     }
-    // Check if this appears to be Brazil data
-    let isBrazilData = false;
     if (geojsonData && geojsonData.features && geojsonData.features.length > 0) {
-        // Check the first feature's coordinates - if they're near Rio de Janeiro
-        const firstFeature = geojsonData.features[0];
-        if (firstFeature.geometry && firstFeature.geometry.coordinates) {
-            const coords = firstFeature.geometry.coordinates[0][0];
-            if (coords) {
-                const [lon, lat] = coords;
-                // Check if coordinates are in Brazil (roughly)
-                if (lat < -20 && lat > -25 && lon < -40 && lon > -45) {
-                    isBrazilData = true;
-                    console.log("Detected Brazil coordinates in data");
-                    // Also switch to the satellite view for better context
-                    document.querySelectorAll('.leaflet-control-layers-base input')[1].click();
-                }
-            }
         }
     }
     // Update feature type if available in the data
     if (geojsonData && geojsonData.feature_type) {
         currentFeatureType = geojsonData.feature_type;
@@ -145,7 +156,7 @@ function displayGeoJSON(geojsonData) {
                 opacity: 1,
                 fillOpacity: 0.8
             };
             // Set color based on feature type
             switch(currentFeatureType) {
                 case 'buildings':
@@ -163,14 +174,14 @@ function displayGeoJSON(geojsonData) {
                 default:
                     pointStyle.fillColor = getRandomColor();
             }
             return L.circleMarker(latlng, pointStyle);
         },
         onEachFeature: function(feature, layer) {
             // Add popups to show feature properties
             if (feature.properties) {
                 let popupContent = '<div class="feature-popup">';
                 // Set title based on feature type
                 let title = 'Feature';
                 switch(currentFeatureType) {
@@ -187,15 +198,15 @@ function displayGeoJSON(geojsonData) {
                         title = 'Road';
                         break;
                 }
                 popupContent += `<h5>${title} Properties</h5>`;
                 for (const [key, value] of Object.entries(feature.properties)) {
                     popupContent += `<strong>${key}:</strong> ${value}<br>`;
                 }
                 popupContent += '</div>';
                 layer.bindPopup(popupContent);
             }
         }
@@ -203,7 +214,11 @@ function displayGeoJSON(geojsonData) {
     // Zoom to fit the GeoJSON data bounds
     if (geojsonLayer.getBounds().isValid()) {
-        map.fitBounds(geojsonLayer.getBounds());
     }
 }
@@ -221,6 +236,72 @@ function formatGeoJSON(geojson) {
     return JSON.stringify(geojson, null, 2);
 }
 // Initialize map when the DOM is loaded
 document.addEventListener('DOMContentLoaded', function() {
     // The map will be initialized when results are available

 let currentFeatureType = 'buildings';
 // Initialize the map with default settings
+function initMap(initialCoords) {
     // If map already exists, remove it and create a new one
     if (map !== null) {
         map.remove();
     }
+    // Default center coordinates (will be overridden by GeoJSON data)
+    let center = [0, 0];
+    let zoom = 2;
+    // If coordinates are provided, use them
+    if (initialCoords && initialCoords.lat !== undefined && initialCoords.lng !== undefined) {
+        center = [initialCoords.lat, initialCoords.lng];
+        zoom = initialCoords.zoom || 13;
     }
+    // Initialize the map with the center coordinates
+    map = L.map('map').setView(center, zoom);
     // Define tile layers
     const osmLayer = L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
         attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors',
         maxZoom: 19
     });
     const satelliteLayer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', {
         attribution: 'Imagery &copy; Esri',
         maxZoom: 19
     // Add OpenStreetMap layer by default
     osmLayer.addTo(map);
     // Add layer control
     const baseLayers = {
         "OpenStreetMap": osmLayer,
         "Satellite": satelliteLayer
     };
     L.control.layers(baseLayers, null, {position: 'topright'}).addTo(map);
     // Add a scale control
 // Display GeoJSON data on the map
 function displayGeoJSON(geojsonData) {
+    // Log the GeoJSON data for debugging
+    console.log('GeoJSON data received:', geojsonData);
+    if (geojsonData && geojsonData.features && geojsonData.features.length > 0) {
+        console.log('First feature:', geojsonData.features[0]);
+        if (geojsonData.features[0].geometry && geojsonData.features[0].geometry.coordinates) {
+            console.log('First feature coordinates:',
+                geojsonData.features[0].geometry.type === 'Polygon' ?
+                geojsonData.features[0].geometry.coordinates[0][0] :
+                geojsonData.features[0].geometry.coordinates[0][0][0]);
+        }
+    }
+    // Calculate center coordinates from GeoJSON data
+    let initialCoords = calculateCenterFromGeoJSON(geojsonData);
+    console.log('Calculated center coordinates:', initialCoords);
     if (!map) {
+        initMap(initialCoords);
     }
+    // Switch to satellite view for better context when viewing features
     if (geojsonData && geojsonData.features && geojsonData.features.length > 0) {
+        // Switch to satellite view for better visualization
+        try {
+            document.querySelectorAll('.leaflet-control-layers-base input')[1].click();
+        } catch (e) {
+            console.warn('Could not switch to satellite view:', e);
         }
     }
     // Update feature type if available in the data
     if (geojsonData && geojsonData.feature_type) {
         currentFeatureType = geojsonData.feature_type;
                 opacity: 1,
                 fillOpacity: 0.8
             };
             // Set color based on feature type
             switch(currentFeatureType) {
                 case 'buildings':
                 default:
                     pointStyle.fillColor = getRandomColor();
             }
             return L.circleMarker(latlng, pointStyle);
         },
         onEachFeature: function(feature, layer) {
             // Add popups to show feature properties
             if (feature.properties) {
                 let popupContent = '<div class="feature-popup">';
                 // Set title based on feature type
                 let title = 'Feature';
                 switch(currentFeatureType) {
                         title = 'Road';
                         break;
                 }
                 popupContent += `<h5>${title} Properties</h5>`;
                 for (const [key, value] of Object.entries(feature.properties)) {
                     popupContent += `<strong>${key}:</strong> ${value}<br>`;
                 }
                 popupContent += '</div>';
                 layer.bindPopup(popupContent);
             }
         }
     // Zoom to fit the GeoJSON data bounds
     if (geojsonLayer.getBounds().isValid()) {
+        const bounds = geojsonLayer.getBounds();
+        console.log('GeoJSON bounds:', bounds);
+        map.fitBounds(bounds);
+    } else {
+        console.warn('GeoJSON bounds not valid');
     }
 }
     return JSON.stringify(geojson, null, 2);
 }
+// Calculate center coordinates from GeoJSON data
+function calculateCenterFromGeoJSON(geojsonData) {
+    if (!geojsonData || !geojsonData.features || geojsonData.features.length === 0) {
+        return { lat: 0, lng: 0, zoom: 2 }; // Default to world view
+    }
+    try {
+        // Create a temporary GeoJSON layer to calculate bounds
+        const tempLayer = L.geoJSON(geojsonData);
+        const bounds = tempLayer.getBounds();
+        if (bounds.isValid()) {
+            const center = bounds.getCenter();
+            // Calculate appropriate zoom level based on bounds size
+            const zoom = getBoundsZoomLevel(bounds);
+            return { lat: center.lat, lng: center.lng, zoom: zoom };
+        }
+    } catch (e) {
+        console.warn('Error calculating center from GeoJSON:', e);
+    }
+    // If we can't calculate from features, try to get center from the first feature
+    try {
+        const firstFeature = geojsonData.features[0];
+        if (firstFeature.geometry && firstFeature.geometry.coordinates) {
+            let coords;
+            // Handle different geometry types
+            if (firstFeature.geometry.type === 'Point') {
+                coords = firstFeature.geometry.coordinates;
+                return { lat: coords[1], lng: coords[0], zoom: 15 };
+            } else if (firstFeature.geometry.type === 'Polygon') {
+                coords = firstFeature.geometry.coordinates[0][0];
+                return { lat: coords[1], lng: coords[0], zoom: 13 };
+            } else if (firstFeature.geometry.type === 'MultiPolygon') {
+                coords = firstFeature.geometry.coordinates[0][0][0];
+                return { lat: coords[1], lng: coords[0], zoom: 13 };
+            }
+        }
+    } catch (e) {
+        console.warn('Error getting coordinates from first feature:', e);
+    }
+    // Default fallback
+    return { lat: 0, lng: 0, zoom: 2 };
+}
+// Calculate appropriate zoom level based on bounds size
+function getBoundsZoomLevel(bounds) {
+    const WORLD_DIM = { height: 256, width: 256 };
+    const ZOOM_MAX = 18;
+    const ne = bounds.getNorthEast();
+    const sw = bounds.getSouthWest();
+    const latFraction = (ne.lat - sw.lat) / 180;
+    const lngFraction = (ne.lng - sw.lng) / 360;
+    const latZoom = Math.floor(Math.log(1 / latFraction) / Math.LN2);
+    const lngZoom = Math.floor(Math.log(1 / lngFraction) / Math.LN2);
+    const zoom = Math.min(latZoom, lngZoom, ZOOM_MAX);
+    return zoom > 0 ? zoom - 1 : 0; // Zoom out slightly for better context
+}
 // Initialize map when the DOM is loaded
 document.addEventListener('DOMContentLoaded', function() {
     // The map will be initialized when results are available

static/js/upload.js CHANGED Viewed

@@ -19,33 +19,33 @@ const downloadBtn = document.getElementById('downloadBtn');
 // Handle form submission
 uploadForm.addEventListener('submit', function(event) {
     event.preventDefault();
     // Get the selected file
     const file = imageFileInput.files[0];
     // Check if a file was selected
     if (!file) {
         showError('Please select an image file to upload');
         return;
     }
     // Check file type
     const validImageTypes = ['image/png', 'image/jpeg', 'image/tiff', 'image/tif'];
     if (!validImageTypes.includes(file.type)) {
         showError('Please select a valid image file (PNG, JPG, or TIFF)');
         return;
     }
     // Show processing status and hide error message
     processingStatus.classList.remove('d-none');
     errorMessage.classList.add('d-none');
     resultsSection.classList.add('d-none');
     // Create FormData object for file upload
     const formData = new FormData();
     formData.append('file', file);
     formData.append('feature_type', featureTypeSelect.value);
     // Upload the file - add error handling for network issues
     fetch('/upload', {
         method: 'POST',
@@ -75,10 +75,10 @@ uploadForm.addEventListener('submit', function(event) {
     .then(data => {
         // Hide processing status
         processingStatus.classList.add('d-none');
         // Store the GeoJSON filename for download
         currentGeoJsonFilename = data.geojson_filename;
         // Display the results
         displayResults(data);
     })
@@ -93,12 +93,15 @@ uploadForm.addEventListener('submit', function(event) {
 function displayResults(data) {
     // Show the results section
     resultsSection.classList.remove('d-none');
     // Initialize the map if not already done
     if (!map) {
-        initMap();
     }
     // Update the header to show the feature type
     const featureType = data.feature_type || 'buildings';
     const featureTypeName = {
@@ -107,19 +110,23 @@ function displayResults(data) {
         'water': 'Water Bodies',
         'roads': 'Roads'
     }[featureType] || 'Features';
     // Update the card header text
     const resultsHeader = document.querySelector('#resultsSection .card-header h3');
     if (resultsHeader) {
         resultsHeader.innerHTML = `<i class="fas fa-map"></i> ${featureTypeName} Extraction Results`;
     }
     // Display the GeoJSON on the map
-    displayGeoJSON(data.geojson);
     // Format and display the GeoJSON in the text area
     geojsonDisplay.textContent = formatGeoJSON(data.geojson);
     // Scroll to the results section
     resultsSection.scrollIntoView({ behavior: 'smooth' });
 }

 // Handle form submission
 uploadForm.addEventListener('submit', function(event) {
     event.preventDefault();
     // Get the selected file
     const file = imageFileInput.files[0];
     // Check if a file was selected
     if (!file) {
         showError('Please select an image file to upload');
         return;
     }
     // Check file type
     const validImageTypes = ['image/png', 'image/jpeg', 'image/tiff', 'image/tif'];
     if (!validImageTypes.includes(file.type)) {
         showError('Please select a valid image file (PNG, JPG, or TIFF)');
         return;
     }
     // Show processing status and hide error message
     processingStatus.classList.remove('d-none');
     errorMessage.classList.add('d-none');
     resultsSection.classList.add('d-none');
     // Create FormData object for file upload
     const formData = new FormData();
     formData.append('file', file);
     formData.append('feature_type', featureTypeSelect.value);
     // Upload the file - add error handling for network issues
     fetch('/upload', {
         method: 'POST',
     .then(data => {
         // Hide processing status
         processingStatus.classList.add('d-none');
         // Store the GeoJSON filename for download
         currentGeoJsonFilename = data.geojson_filename;
         // Display the results
         displayResults(data);
     })
 function displayResults(data) {
     // Show the results section
     resultsSection.classList.remove('d-none');
+    // Calculate center coordinates from GeoJSON data
+    let initialCoords = calculateCenterFromGeoJSON(data.geojson);
     // Initialize the map if not already done
     if (!map) {
+        initMap(initialCoords);
     }
     // Update the header to show the feature type
     const featureType = data.feature_type || 'buildings';
     const featureTypeName = {
         'water': 'Water Bodies',
         'roads': 'Roads'
     }[featureType] || 'Features';
     // Update the card header text
     const resultsHeader = document.querySelector('#resultsSection .card-header h3');
     if (resultsHeader) {
         resultsHeader.innerHTML = `<i class="fas fa-map"></i> ${featureTypeName} Extraction Results`;
     }
+    // Add feature type to GeoJSON data for styling
+    const geojsonWithType = data.geojson;
+    geojsonWithType.feature_type = data.feature_type;
     // Display the GeoJSON on the map
+    displayGeoJSON(geojsonWithType);
     // Format and display the GeoJSON in the text area
     geojsonDisplay.textContent = formatGeoJSON(data.geojson);
     // Scroll to the results section
     resultsSection.scrollIntoView({ behavior: 'smooth' });
 }

utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (142 Bytes). View file

utils/__pycache__/advanced_extraction.cpython-312.pyc ADDED Viewed

Binary file (9.56 kB). View file

utils/__pycache__/geospatial.cpython-312.pyc ADDED Viewed

Binary file (18.8 kB). View file

utils/__pycache__/image_processing.cpython-312.pyc ADDED Viewed

Binary file (2.87 kB). View file

utils/__pycache__/segmentation.cpython-312.pyc ADDED Viewed

Binary file (8.92 kB). View file

utils/advanced_extraction.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+Advanced feature extraction using geoai-py package.
+This module provides integration with the geoai-py package for more accurate
+feature extraction from geospatial imagery.
+"""
+import os
+import logging
+import geoai
+import json
+from shapely.geometry import shape
+def extract_buildings_from_geotiff(image_path, output_folder, confidence_threshold=0.5, mask_threshold=0.5):
+    """
+    Extract building footprints from a GeoTIFF image using geoai-py.
+    Args:
+        image_path (str): Path to the input GeoTIFF image
+        output_folder (str): Directory to save output files
+        confidence_threshold (float): Confidence threshold for detection (0.0-1.0)
+        mask_threshold (float): Mask threshold for segmentation (0.0-1.0)
+    Returns:
+        str: Path to the generated GeoJSON file
+    """
+    try:
+        logging.info(f"Extracting buildings from {image_path} using geoai-py")
+        # Initialize the building footprint extractor
+        extractor = geoai.BuildingFootprintExtractor()
+        # Generate a unique output path for the GeoJSON
+        base_name = os.path.splitext(os.path.basename(image_path))[0]
+        geojson_path = os.path.join(output_folder, f"{base_name}_buildings.geojson")
+        # Process the raster to extract building footprints
+        gdf = extractor.process_raster(
+            image_path,
+            output_path=geojson_path,
+            batch_size=4,
+            confidence_threshold=confidence_threshold,
+            overlap=0.25,
+            nms_iou_threshold=0.5,
+            min_object_area=100,
+            max_object_area=None,
+            mask_threshold=mask_threshold,
+            simplify_tolerance=1.0,
+        )
+        # Regularize the building footprints for more rectangular shapes
+        gdf_regularized = extractor.regularize_buildings(
+            gdf=gdf,
+            min_area=100,
+            angle_threshold=15,
+            orthogonality_threshold=0.3,
+            rectangularity_threshold=0.7,
+        )
+        # Ensure the GeoDataFrame is in WGS84 (EPSG:4326) for web mapping
+        try:
+            # Check if the GeoDataFrame has a CRS
+            if gdf_regularized.crs is not None and gdf_regularized.crs != 'EPSG:4326':
+                logging.info(f"Converting GeoDataFrame from {gdf_regularized.crs} to WGS84 (EPSG:4326)")
+                # Reproject to WGS84
+                gdf_regularized = gdf_regularized.to_crs('EPSG:4326')
+            elif gdf_regularized.crs is None:
+                # Try to get CRS from the original image
+                import rasterio
+                with rasterio.open(image_path) as src:
+                    if src.crs is not None:
+                        logging.info(f"Setting CRS from image: {src.crs}")
+                        gdf_regularized.crs = src.crs
+                        # Reproject to WGS84
+                        gdf_regularized = gdf_regularized.to_crs('EPSG:4326')
+        except Exception as e:
+            logging.warning(f"Error reprojecting to WGS84: {str(e)}")
+        # Save the regularized buildings to GeoJSON
+        regularized_geojson_path = os.path.join(output_folder, f"{base_name}_buildings_regularized.geojson")
+        gdf_regularized.to_file(regularized_geojson_path, driver="GeoJSON")
+        logging.info(f"Successfully extracted {len(gdf_regularized)} buildings")
+        # Return the path to the regularized GeoJSON
+        return regularized_geojson_path
+    except Exception as e:
+        logging.error(f"Error extracting buildings with geoai-py: {str(e)}")
+        raise
+def extract_trees_from_geotiff(image_path, output_folder, confidence_threshold=0.5, mask_threshold=0.5):
+    """
+    Extract tree/vegetation cover from a GeoTIFF image.
+    This is a placeholder for future implementation.
+    Args:
+        image_path (str): Path to the input GeoTIFF image
+        output_folder (str): Directory to save output files
+        confidence_threshold (float): Confidence threshold for detection (0.0-1.0)
+        mask_threshold (float): Mask threshold for segmentation (0.0-1.0)
+    Returns:
+        str: Path to the generated GeoJSON file
+    """
+    # This would be implemented in the future
+    # For now, we'll use our existing segmentation approach
+    from utils.geospatial import process_image_to_geojson
+    from utils.image_processing import process_image
+    processed_image_path = process_image(image_path, output_folder)
+    geojson_data = process_image_to_geojson(processed_image_path, feature_type="trees", original_file_path=image_path)
+    # Save the GeoJSON to a file
+    base_name = os.path.splitext(os.path.basename(image_path))[0]
+    geojson_path = os.path.join(output_folder, f"{base_name}_trees.geojson")
+    with open(geojson_path, 'w') as f:
+        json.dump(geojson_data, f)
+    return geojson_path
+def geojson_to_app_format(geojson_path):
+    """
+    Convert a GeoJSON file from geoai-py to the format expected by our application.
+    Args:
+        geojson_path (str): Path to the GeoJSON file
+    Returns:
+        dict: GeoJSON data in the format expected by our application
+    """
+    try:
+        # Read the GeoJSON file
+        with open(geojson_path, 'r') as f:
+            geojson_data = json.load(f)
+        # Log the GeoJSON data for debugging
+        logging.info(f"GeoJSON data loaded from {geojson_path}")
+        if geojson_data and 'features' in geojson_data and geojson_data['features']:
+            first_feature = geojson_data['features'][0]
+            if 'geometry' in first_feature and 'coordinates' in first_feature['geometry']:
+                try:
+                    if first_feature['geometry']['type'] == 'Polygon':
+                        coords = first_feature['geometry']['coordinates'][0][0]
+                    else:  # MultiPolygon
+                        coords = first_feature['geometry']['coordinates'][0][0][0]
+                    logging.info(f"First feature coordinates: {coords}")
+                except Exception as e:
+                    logging.warning(f"Error extracting coordinates from first feature: {str(e)}")
+        # Our application expects a specific format, so we'll convert if needed
+        if 'features' not in geojson_data:
+            # Create a new GeoJSON FeatureCollection
+            converted_geojson = {
+                "type": "FeatureCollection",
+                "features": []
+            }
+            # Add each feature to the collection
+            for i, feature in enumerate(geojson_data):
+                converted_geojson["features"].append({
+                    "type": "Feature",
+                    "geometry": feature["geometry"],
+                    "properties": feature.get("properties", {"id": i})
+                })
+            logging.info(f"Converted GeoJSON to FeatureCollection with {len(converted_geojson['features'])} features")
+            return converted_geojson
+        # If it's already in the right format, return as is
+        logging.info(f"GeoJSON already in FeatureCollection format with {len(geojson_data['features'])} features")
+        return geojson_data
+    except Exception as e:
+        logging.error(f"Error converting GeoJSON format: {str(e)}")
+        # Return an empty GeoJSON if there's an error
+        return {"type": "FeatureCollection", "features": []}
+def extract_features_from_geotiff(image_path, output_folder, feature_type="buildings"):
+    """
+    Extract features from a GeoTIFF image based on the feature type.
+    Args:
+        image_path (str): Path to the input GeoTIFF image
+        output_folder (str): Directory to save output files
+        feature_type (str): Type of features to extract ("buildings", "trees", "water", "roads")
+    Returns:
+        dict: GeoJSON data in the format expected by our application
+    """
+    try:
+        if feature_type.lower() == "buildings":
+            # Use the advanced building extraction
+            geojson_path = extract_buildings_from_geotiff(image_path, output_folder)
+        elif feature_type.lower() == "trees" or feature_type.lower() == "vegetation":
+            # Use the tree extraction (placeholder for now)
+            geojson_path = extract_trees_from_geotiff(image_path, output_folder)
+        else:
+            # For other feature types, use our existing approach
+            from utils.geospatial import process_image_to_geojson
+            from utils.image_processing import process_image
+            processed_image_path = process_image(image_path, output_folder)
+            geojson_data = process_image_to_geojson(processed_image_path, feature_type=feature_type, original_file_path=image_path)
+            # Save the GeoJSON to a file
+            base_name = os.path.splitext(os.path.basename(image_path))[0]
+            geojson_path = os.path.join(output_folder, f"{base_name}_{feature_type}.geojson")
+            with open(geojson_path, 'w') as f:
+                json.dump(geojson_data, f)
+            # Add feature type to the GeoJSON data
+            geojson_data['feature_type'] = feature_type
+            # Return the data directly since it's already in our format
+            return geojson_data
+        # Convert the GeoJSON to our application format
+        result = geojson_to_app_format(geojson_path)
+        # Add feature type to the GeoJSON data
+        result['feature_type'] = feature_type
+        return result
+    except Exception as e:
+        logging.error(f"Error extracting features: {str(e)}")
+        # Return an empty GeoJSON if there's an error
+        return {"type": "FeatureCollection", "features": []}

utils/geospatial.py CHANGED Viewed

@@ -19,12 +19,12 @@ def extract_contours(image_path, min_area=50, epsilon_factor=0.002):
     """
     Extract contours from an image and convert them to polygons.
     Uses OpenCV's contour detection with douglas-peucker simplification.
     Args:
         image_path (str): Path to the processed image
         min_area (int): Minimum contour area to keep
         epsilon_factor (float): Simplification factor for douglas-peucker algorithm
     Returns:
         list: List of polygon objects
     """
@@ -35,42 +35,42 @@ def extract_contours(image_path, min_area=50, epsilon_factor=0.002):
             # Try using PIL if OpenCV fails
             pil_img = Image.open(image_path).convert('L')
             img = np.array(pil_img)
         # Apply threshold if needed
         _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
         # Find contours
         contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         polygons = []
         for contour in contours:
             # Filter small contours
             area = cv2.contourArea(contour)
             if area < min_area:
                 continue
             # Apply Douglas-Peucker algorithm to simplify contours
             epsilon = epsilon_factor * cv2.arcLength(contour, True)
             approx = cv2.approxPolyDP(contour, epsilon, True)
             # Convert to polygon
             if len(approx) >= 3:  # At least 3 points needed for a polygon
                 polygon_points = []
                 for point in approx:
                     x, y = point[0]
                     polygon_points.append((float(x), float(y)))
                 # Create a valid polygon (close it if needed)
                 if polygon_points[0] != polygon_points[-1]:
                     polygon_points.append(polygon_points[0])
                 # Create shapely polygon
                 polygon = Polygon(polygon_points)
                 if polygon.is_valid:
                     polygons.append(polygon)
         return polygons
     except Exception as e:
         logging.error(f"Error extracting contours: {str(e)}")
         return []
@@ -78,11 +78,11 @@ def extract_contours(image_path, min_area=50, epsilon_factor=0.002):
 def simplify_polygons(polygons, tolerance=1.0):
     """
     Apply polygon simplification to reduce the number of vertices.
     Args:
         polygons (list): List of shapely Polygon objects
         tolerance (float): Simplification tolerance
     Returns:
         list: List of simplified polygons
     """
@@ -92,16 +92,16 @@ def simplify_polygons(polygons, tolerance=1.0):
         simp = polygon.simplify(tolerance, preserve_topology=True)
         if simp.is_valid and not simp.is_empty:
             simplified.append(simp)
     return simplified
 def regularize_polygons(polygons):
     """
     Regularize polygons to make them more rectangular when appropriate.
     Args:
         polygons (list): List of shapely Polygon objects
     Returns:
         list: List of regularized polygons
     """
@@ -113,13 +113,13 @@ def regularize_polygons(polygons):
             width = bounds[2] - bounds[0]
             height = bounds[3] - bounds[1]
             area_ratio = polygon.area / (width * height)
             # If it's at least 80% similar to a rectangle, make it rectangular
             if area_ratio > 0.8:
                 # Replace with the minimum bounding rectangle
                 minx, miny, maxx, maxy = polygon.bounds
                 regularized.append(Polygon([
-                    (minx, miny), (maxx, miny),
                     (maxx, maxy), (minx, maxy), (minx, miny)
                 ]))
             else:
@@ -127,29 +127,29 @@ def regularize_polygons(polygons):
         except Exception as e:
             logging.warning(f"Error regularizing polygon: {str(e)}")
             regularized.append(polygon)
     return regularized
 def merge_nearby_polygons(polygons, distance_threshold=5.0):
     """
     Merge polygons that are close to each other to reduce the polygon count.
     Args:
         polygons (list): List of shapely Polygon objects
         distance_threshold (float): Distance threshold for merging
     Returns:
         list: List of merged polygons
     """
     if not polygons:
         return []
     # Buffer polygons slightly to create overlaps for nearby polygons
     buffered = [polygon.buffer(distance_threshold) for polygon in polygons]
     # Union all buffered polygons
     union = ops.unary_union(buffered)
     # Convert the result to a list of polygons
     if isinstance(union, Polygon):
         return [union]
@@ -161,35 +161,59 @@ def merge_nearby_polygons(polygons, distance_threshold=5.0):
 def extract_geo_coordinates_from_image(image_path):
     """
     Extract geographic coordinates from image metadata (EXIF, GeoTIFF).
     Args:
         image_path (str): Path to the image file
     Returns:
         tuple: (min_lat, min_lon, max_lat, max_lon) or None if not found
     """
     try:
         img = Image.open(image_path)
         # Check if it's a TIFF image with geospatial data
         if hasattr(img, 'tag') and img.tag:
             logging.info(f"Detected image with tags, checking for geospatial metadata")
             # Try to extract ModelPixelScaleTag (33550) and ModelTiepointTag (33922)
             pixel_scale_tag = None
             tiepoint_tag = None
             # Check for tags
             tag_dict = img.tag.items() if hasattr(img.tag, 'items') else {}
-            # For the trees_brazil.tif specific case - fallback to direct inspection of tags
-            # Check if this is our Brazil image using any clue in the filename
-            brazil_indicators = ['brazil', 'trees_brazil', 'trees']
             is_brazil_image = False
-            for indicator in brazil_indicators:
-                if indicator.lower() in image_path.lower():
-                    is_brazil_image = True
-                    break
             if not tag_dict and is_brazil_image:
                 logging.info(f"Special case for Brazil image detected in: {image_path}")
                 # Hard code Brazil coordinates for the specific sample
@@ -201,90 +225,74 @@ def extract_geo_coordinates_from_image(image_path):
                 max_lon = -43.36
                 logging.info(f"Using known Brazil coordinates: {min_lon},{min_lat} to {max_lon},{max_lat}")
                 return min_lat, min_lon, max_lat, max_lon
             for tag_id, value in tag_dict:
                 tag_name = TiffTags.TAGS.get(tag_id, str(tag_id))
                 logging.debug(f"TIFF tag: {tag_name} ({tag_id}): {value}")
                 if tag_id == 33550:  # ModelPixelScaleTag
                     pixel_scale_tag = value
                 elif tag_id == 33922:  # ModelTiepointTag
                     tiepoint_tag = value
             # Supplementary check for the log output we can see (raw detection)
             # Look for any GeoTIFF tag indicators in the output
             geotiff_indicators = ['ModelPixelScale', 'ModelTiepoint', 'GeoKey', 'GeoAscii']
             has_geotiff_indicators = False
             for indicator in geotiff_indicators:
                 if indicator in str(img.tag):
                     has_geotiff_indicators = True
                     logging.info(f"Found GeoTIFF indicator: {indicator}")
                     break
             # Look for any TIFF tag containing geographic info
             log_pattern = r"ModelPixelScaleTag.*?value: b'(.*?)'"
             log_matches = re.findall(log_pattern, str(img.tag))
             # If we detect any GeoTIFF indicators or raw tags, consider it a Brazil image
             if (log_matches or has_geotiff_indicators) and not pixel_scale_tag:
                 logging.info(f"GeoTIFF indicators detected in image")
-                # If Brazil indicators found in the filename, use Brazil coordinates
-                if is_brazil_image or 'Brazil' in str(img.tag) or 'brazil' in str(img.tag):
-                    # More precise Rio de Janeiro coordinates
-                    min_lat = -22.980  # Southern Brazil (Rio de Janeiro)
-                    min_lon = -43.400
-                    max_lat = -22.920
-                    max_lon = -43.300
-                    logging.info(f"Using precise Rio de Janeiro, Brazil coordinates: {min_lon},{min_lat} to {max_lon},{max_lat}")
-                    return min_lat, min_lon, max_lat, max_lon
-                else:
-                    # Try to extract values from raw tag data if possible
-                    try:
-                        # Parse the modelPixelScale if available
-                        if log_matches:
-                            logging.info(f"Found raw pixel scale data: {log_matches[0]}")
-                            # Fallback to Brazil coordinates for now - this is the sample data location
-                            min_lat = -22.980  # Southern Brazil (Rio de Janeiro)
-                            min_lon = -43.400
-                            max_lat = -22.920
-                            max_lon = -43.300
-                            logging.info(f"Using Brazil coordinates from detected GeoTIFF: {min_lon},{min_lat} to {max_lon},{max_lat}")
-                            return min_lat, min_lon, max_lat, max_lon
-                    except Exception as e:
-                        logging.error(f"Error parsing raw tag data: {str(e)}")
             if pixel_scale_tag and tiepoint_tag:
                 # Extract pixel scale (x, y)
                 x_scale = float(pixel_scale_tag[0])
                 y_scale = float(pixel_scale_tag[1])
                 # Extract model tiepoint (raster origin)
                 i, j, k = float(tiepoint_tag[0]), float(tiepoint_tag[1]), float(tiepoint_tag[2])
                 x, y, z = float(tiepoint_tag[3]), float(tiepoint_tag[4]), float(tiepoint_tag[5])
                 # Calculate bounds based on image dimensions
                 width, height = img.size
                 # Calculate bounds
                 min_lon = x
                 max_lat = y
                 max_lon = x + width * x_scale
                 min_lat = y - height * y_scale
                 logging.info(f"Extracted geo bounds: {min_lon},{min_lat} to {max_lon},{max_lat}")
                 return min_lat, min_lon, max_lat, max_lon
             logging.info("No valid geospatial metadata found in TIFF")
         # Check for EXIF GPS data (typically in JPEG)
         elif hasattr(img, '_getexif') and img._getexif():
             exif = img._getexif()
             if exif and 34853 in exif:  # 34853 is the GPS Info tag
                 gps_info = exif[34853]
                 # Extract GPS data
                 if 1 in gps_info and 2 in gps_info and 3 in gps_info and 4 in gps_info:
                     # Latitude
@@ -293,36 +301,38 @@ def extract_geo_coordinates_from_image(image_path):
                     lat_val = lat[0][0]/lat[0][1] + lat[1][0]/(lat[1][1]*60) + lat[2][0]/(lat[2][1]*3600)
                     if lat_ref == 'S':
                         lat_val = -lat_val
                     # Longitude
                     lon_ref = gps_info[3]  # 'E' or 'W'
                     lon = gps_info[4]
                     lon_val = lon[0][0]/lon[0][1] + lon[1][0]/(lon[1][1]*60) + lon[2][0]/(lon[2][1]*3600)
                     if lon_ref == 'W':
                         lon_val = -lon_val
                     # Create a small region around the point
                     delta = 0.01  # ~1km at the equator
                     min_lat = lat_val - delta
                     min_lon = lon_val - delta
                     max_lat = lat_val + delta
                     max_lon = lon_val + delta
                     logging.info(f"Extracted EXIF GPS bounds: {min_lon},{min_lat} to {max_lon},{max_lat}")
                     return min_lat, min_lon, max_lat, max_lon
             logging.info("No valid GPS metadata found in EXIF")
         return None
     except Exception as e:
         logging.error(f"Error extracting geo coordinates: {str(e)}")
         return None
-def convert_to_geojson_with_transform(polygons, image_height, image_width,
                                     min_lat=None, min_lon=None, max_lat=None, max_lon=None):
     """
     Convert polygons to GeoJSON with proper geographic transformation.
     Args:
         polygons (list): List of shapely Polygon objects
         image_height (int): Height of the source image
@@ -331,22 +341,23 @@ def convert_to_geojson_with_transform(polygons, image_height, image_width,
         min_lon (float, optional): Minimum longitude for geographic bounds
         max_lat (float, optional): Maximum latitude for geographic bounds
         max_lon (float, optional): Maximum longitude for geographic bounds
     Returns:
         dict: GeoJSON object
     """
     # Set default geographic bounds if not provided
     if None in (min_lon, min_lat, max_lon, max_lat):
         # Default to somewhere neutral (not in New York)
         min_lon, min_lat = -98.0, 32.0  # Central US
         max_lon, max_lat = -96.0, 34.0
     # Create a GeoJSON feature collection
     geojson = {
         "type": "FeatureCollection",
         "features": []
     }
     # Function to transform pixel coordinates to geographic coordinates
     def transform_point(x, y):
         # Linear interpolation
@@ -354,21 +365,21 @@ def convert_to_geojson_with_transform(polygons, image_height, image_width,
         # Invert y-axis for geographic coordinates
         lat = max_lat - (y / image_height) * (max_lat - min_lat)
         return lon, lat
     # Convert each polygon to a GeoJSON feature
     for i, polygon in enumerate(polygons):
         # Extract coordinates
         coords = list(polygon.exterior.coords)
         # Transform coordinates to geographic space
         geo_coords = [transform_point(x, y) for x, y in coords]
         # Create GeoJSON geometry
         geometry = {
             "type": "Polygon",
             "coordinates": [geo_coords]
         }
         # Create GeoJSON feature
         feature = {
             "type": "Feature",
@@ -378,19 +389,20 @@ def convert_to_geojson_with_transform(polygons, image_height, image_width,
             },
             "geometry": geometry
         }
         geojson["features"].append(feature)
     return geojson
-def process_image_to_geojson(image_path, feature_type="buildings"):
     """
     Complete pipeline to convert an image to a simplified GeoJSON.
     Args:
         image_path (str): Path to the processed image
         feature_type (str): Type of features to extract ("buildings", "trees", "water", "roads")
     Returns:
         dict: GeoJSON object
     """
@@ -398,27 +410,29 @@ def process_image_to_geojson(image_path, feature_type="buildings"):
         # Open image to get dimensions
         img = Image.open(image_path)
         width, height = img.size
         # Import segmentation module here to avoid circular imports
         from utils.segmentation import segment_and_extract_features
         # Extract features using advanced segmentation
         _, polygons = segment_and_extract_features(
-            image_path,
             output_mask_path=None,
             feature_type=feature_type,
-            min_area=50,
             simplify_tolerance=2.0,
             merge_distance=5.0
         )
         if not polygons:
             logging.warning("No polygons found in the image after segmentation")
             return {"type": "FeatureCollection", "features": []}
-        # Try to extract coordinates from the original image
-        original_image_path = None
-        if "_processed" in image_path:
             original_image_path = image_path.replace("_processed", "")
             # Try the original image path but replace the extension with common formats
             if not os.path.exists(original_image_path):
@@ -427,34 +441,62 @@ def process_image_to_geojson(image_path, feature_type="buildings"):
                     if os.path.exists(base_path + ext):
                         original_image_path = base_path + ext
                         break
         # Extract bounds from image if possible
         coords = None
         if original_image_path and os.path.exists(original_image_path):
             logging.info(f"Checking original image for geospatial data: {original_image_path}")
             coords = extract_geo_coordinates_from_image(original_image_path)
         if not coords:
             logging.info("Checking processed image for geospatial data")
             coords = extract_geo_coordinates_from_image(image_path)
         # Use extracted coordinates or defaults
         if coords:
             min_lat, min_lon, max_lat, max_lon = coords
         else:
-            logging.info("No coordinates found in image, using default location in Central US")
-            min_lat, min_lon = 32.0, -98.0  # Central US
-            max_lat, max_lon = 34.0, -96.0
         # Convert to GeoJSON with proper transformation
         geojson = convert_to_geojson_with_transform(
             polygons, height, width,
             min_lat=min_lat, min_lon=min_lon,
             max_lat=max_lat, max_lon=max_lon
         )
         return geojson
     except Exception as e:
         logging.error(f"Error in GeoJSON processing: {str(e)}")
         return {"type": "FeatureCollection", "features": []}

     """
     Extract contours from an image and convert them to polygons.
     Uses OpenCV's contour detection with douglas-peucker simplification.
     Args:
         image_path (str): Path to the processed image
         min_area (int): Minimum contour area to keep
         epsilon_factor (float): Simplification factor for douglas-peucker algorithm
     Returns:
         list: List of polygon objects
     """
             # Try using PIL if OpenCV fails
             pil_img = Image.open(image_path).convert('L')
             img = np.array(pil_img)
         # Apply threshold if needed
         _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
         # Find contours
         contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         polygons = []
         for contour in contours:
             # Filter small contours
             area = cv2.contourArea(contour)
             if area < min_area:
                 continue
             # Apply Douglas-Peucker algorithm to simplify contours
             epsilon = epsilon_factor * cv2.arcLength(contour, True)
             approx = cv2.approxPolyDP(contour, epsilon, True)
             # Convert to polygon
             if len(approx) >= 3:  # At least 3 points needed for a polygon
                 polygon_points = []
                 for point in approx:
                     x, y = point[0]
                     polygon_points.append((float(x), float(y)))
                 # Create a valid polygon (close it if needed)
                 if polygon_points[0] != polygon_points[-1]:
                     polygon_points.append(polygon_points[0])
                 # Create shapely polygon
                 polygon = Polygon(polygon_points)
                 if polygon.is_valid:
                     polygons.append(polygon)
         return polygons
     except Exception as e:
         logging.error(f"Error extracting contours: {str(e)}")
         return []
 def simplify_polygons(polygons, tolerance=1.0):
     """
     Apply polygon simplification to reduce the number of vertices.
     Args:
         polygons (list): List of shapely Polygon objects
         tolerance (float): Simplification tolerance
     Returns:
         list: List of simplified polygons
     """
         simp = polygon.simplify(tolerance, preserve_topology=True)
         if simp.is_valid and not simp.is_empty:
             simplified.append(simp)
     return simplified
 def regularize_polygons(polygons):
     """
     Regularize polygons to make them more rectangular when appropriate.
     Args:
         polygons (list): List of shapely Polygon objects
     Returns:
         list: List of regularized polygons
     """
             width = bounds[2] - bounds[0]
             height = bounds[3] - bounds[1]
             area_ratio = polygon.area / (width * height)
             # If it's at least 80% similar to a rectangle, make it rectangular
             if area_ratio > 0.8:
                 # Replace with the minimum bounding rectangle
                 minx, miny, maxx, maxy = polygon.bounds
                 regularized.append(Polygon([
+                    (minx, miny), (maxx, miny),
                     (maxx, maxy), (minx, maxy), (minx, miny)
                 ]))
             else:
         except Exception as e:
             logging.warning(f"Error regularizing polygon: {str(e)}")
             regularized.append(polygon)
     return regularized
 def merge_nearby_polygons(polygons, distance_threshold=5.0):
     """
     Merge polygons that are close to each other to reduce the polygon count.
     Args:
         polygons (list): List of shapely Polygon objects
         distance_threshold (float): Distance threshold for merging
     Returns:
         list: List of merged polygons
     """
     if not polygons:
         return []
     # Buffer polygons slightly to create overlaps for nearby polygons
     buffered = [polygon.buffer(distance_threshold) for polygon in polygons]
     # Union all buffered polygons
     union = ops.unary_union(buffered)
     # Convert the result to a list of polygons
     if isinstance(union, Polygon):
         return [union]
 def extract_geo_coordinates_from_image(image_path):
     """
     Extract geographic coordinates from image metadata (EXIF, GeoTIFF).
+    Uses rasterio for more reliable GeoTIFF handling.
     Args:
         image_path (str): Path to the image file
     Returns:
         tuple: (min_lat, min_lon, max_lat, max_lon) or None if not found
     """
     try:
+        # First try using rasterio for GeoTIFF files
+        if image_path.lower().endswith(('.tif', '.tiff')):
+            try:
+                import rasterio
+                from rasterio.warp import transform_bounds
+                logging.info(f"Using rasterio to extract coordinates from {image_path}")
+                with rasterio.open(image_path) as src:
+                    # Check if the file has a valid CRS
+                    if src.crs is not None:
+                        # Get bounds in the source CRS
+                        bounds = src.bounds
+                        # Transform bounds to WGS84 (lat/lon)
+                        if src.crs.to_epsg() != 4326:
+                            west, south, east, north = transform_bounds(
+                                src.crs, 'EPSG:4326',
+                                bounds.left, bounds.bottom, bounds.right, bounds.top
+                            )
+                        else:
+                            west, south, east, north = bounds
+                        logging.info(f"Extracted coordinates from GeoTIFF: {west},{south} to {east},{north}")
+                        return south, west, north, east  # min_lat, min_lon, max_lat, max_lon
+            except Exception as e:
+                logging.warning(f"Rasterio extraction failed: {str(e)}, falling back to PIL")
+        # Fallback to PIL for other image types or if rasterio fails
         img = Image.open(image_path)
         # Check if it's a TIFF image with geospatial data
         if hasattr(img, 'tag') and img.tag:
             logging.info(f"Detected image with tags, checking for geospatial metadata")
             # Try to extract ModelPixelScaleTag (33550) and ModelTiepointTag (33922)
             pixel_scale_tag = None
             tiepoint_tag = None
             # Check for tags
             tag_dict = img.tag.items() if hasattr(img.tag, 'items') else {}
+            # Remove hardcoded Brazil detection
             is_brazil_image = False
             if not tag_dict and is_brazil_image:
                 logging.info(f"Special case for Brazil image detected in: {image_path}")
                 # Hard code Brazil coordinates for the specific sample
                 max_lon = -43.36
                 logging.info(f"Using known Brazil coordinates: {min_lon},{min_lat} to {max_lon},{max_lat}")
                 return min_lat, min_lon, max_lat, max_lon
             for tag_id, value in tag_dict:
                 tag_name = TiffTags.TAGS.get(tag_id, str(tag_id))
                 logging.debug(f"TIFF tag: {tag_name} ({tag_id}): {value}")
                 if tag_id == 33550:  # ModelPixelScaleTag
                     pixel_scale_tag = value
                 elif tag_id == 33922:  # ModelTiepointTag
                     tiepoint_tag = value
             # Supplementary check for the log output we can see (raw detection)
             # Look for any GeoTIFF tag indicators in the output
             geotiff_indicators = ['ModelPixelScale', 'ModelTiepoint', 'GeoKey', 'GeoAscii']
             has_geotiff_indicators = False
             for indicator in geotiff_indicators:
                 if indicator in str(img.tag):
                     has_geotiff_indicators = True
                     logging.info(f"Found GeoTIFF indicator: {indicator}")
                     break
             # Look for any TIFF tag containing geographic info
             log_pattern = r"ModelPixelScaleTag.*?value: b'(.*?)'"
             log_matches = re.findall(log_pattern, str(img.tag))
             # If we detect any GeoTIFF indicators or raw tags, consider it a Brazil image
             if (log_matches or has_geotiff_indicators) and not pixel_scale_tag:
                 logging.info(f"GeoTIFF indicators detected in image")
+                # Remove hardcoded Brazil coordinates
+                # Try to extract values from raw tag data if possible
+                try:
+                    # Parse the modelPixelScale if available
+                    if log_matches:
+                        logging.info(f"Found raw pixel scale data: {log_matches[0]}")
+                        # We'll continue with the standard TIFF tag processing below
+                except Exception as e:
+                    logging.error(f"Error parsing raw tag data: {str(e)}")
             if pixel_scale_tag and tiepoint_tag:
                 # Extract pixel scale (x, y)
                 x_scale = float(pixel_scale_tag[0])
                 y_scale = float(pixel_scale_tag[1])
                 # Extract model tiepoint (raster origin)
                 i, j, k = float(tiepoint_tag[0]), float(tiepoint_tag[1]), float(tiepoint_tag[2])
                 x, y, z = float(tiepoint_tag[3]), float(tiepoint_tag[4]), float(tiepoint_tag[5])
                 # Calculate bounds based on image dimensions
                 width, height = img.size
                 # Calculate bounds
                 min_lon = x
                 max_lat = y
                 max_lon = x + width * x_scale
                 min_lat = y - height * y_scale
                 logging.info(f"Extracted geo bounds: {min_lon},{min_lat} to {max_lon},{max_lat}")
                 return min_lat, min_lon, max_lat, max_lon
             logging.info("No valid geospatial metadata found in TIFF")
         # Check for EXIF GPS data (typically in JPEG)
         elif hasattr(img, '_getexif') and img._getexif():
             exif = img._getexif()
             if exif and 34853 in exif:  # 34853 is the GPS Info tag
                 gps_info = exif[34853]
                 # Extract GPS data
                 if 1 in gps_info and 2 in gps_info and 3 in gps_info and 4 in gps_info:
                     # Latitude
                     lat_val = lat[0][0]/lat[0][1] + lat[1][0]/(lat[1][1]*60) + lat[2][0]/(lat[2][1]*3600)
                     if lat_ref == 'S':
                         lat_val = -lat_val
                     # Longitude
                     lon_ref = gps_info[3]  # 'E' or 'W'
                     lon = gps_info[4]
                     lon_val = lon[0][0]/lon[0][1] + lon[1][0]/(lon[1][1]*60) + lon[2][0]/(lon[2][1]*3600)
                     if lon_ref == 'W':
                         lon_val = -lon_val
                     # Create a small region around the point
                     delta = 0.01  # ~1km at the equator
                     min_lat = lat_val - delta
                     min_lon = lon_val - delta
                     max_lat = lat_val + delta
                     max_lon = lon_val + delta
                     logging.info(f"Extracted EXIF GPS bounds: {min_lon},{min_lat} to {max_lon},{max_lat}")
                     return min_lat, min_lon, max_lat, max_lon
             logging.info("No valid GPS metadata found in EXIF")
+        # If we get here, we couldn't extract coordinates
+        logging.warning("Could not extract geospatial coordinates from image")
         return None
     except Exception as e:
         logging.error(f"Error extracting geo coordinates: {str(e)}")
         return None
+def convert_to_geojson_with_transform(polygons, image_height, image_width,
                                     min_lat=None, min_lon=None, max_lat=None, max_lon=None):
     """
     Convert polygons to GeoJSON with proper geographic transformation.
     Args:
         polygons (list): List of shapely Polygon objects
         image_height (int): Height of the source image
         min_lon (float, optional): Minimum longitude for geographic bounds
         max_lat (float, optional): Maximum latitude for geographic bounds
         max_lon (float, optional): Maximum longitude for geographic bounds
     Returns:
         dict: GeoJSON object
     """
     # Set default geographic bounds if not provided
     if None in (min_lon, min_lat, max_lon, max_lat):
+        logging.warning("No geographic coordinates provided for GeoJSON transformation. Using default values.")
         # Default to somewhere neutral (not in New York)
         min_lon, min_lat = -98.0, 32.0  # Central US
         max_lon, max_lat = -96.0, 34.0
     # Create a GeoJSON feature collection
     geojson = {
         "type": "FeatureCollection",
         "features": []
     }
     # Function to transform pixel coordinates to geographic coordinates
     def transform_point(x, y):
         # Linear interpolation
         # Invert y-axis for geographic coordinates
         lat = max_lat - (y / image_height) * (max_lat - min_lat)
         return lon, lat
     # Convert each polygon to a GeoJSON feature
     for i, polygon in enumerate(polygons):
         # Extract coordinates
         coords = list(polygon.exterior.coords)
         # Transform coordinates to geographic space
         geo_coords = [transform_point(x, y) for x, y in coords]
         # Create GeoJSON geometry
         geometry = {
             "type": "Polygon",
             "coordinates": [geo_coords]
         }
         # Create GeoJSON feature
         feature = {
             "type": "Feature",
             },
             "geometry": geometry
         }
         geojson["features"].append(feature)
     return geojson
+def process_image_to_geojson(image_path, feature_type="buildings", original_file_path=None):
     """
     Complete pipeline to convert an image to a simplified GeoJSON.
     Args:
         image_path (str): Path to the processed image
         feature_type (str): Type of features to extract ("buildings", "trees", "water", "roads")
+        original_file_path (str, optional): Path to the original uploaded file
     Returns:
         dict: GeoJSON object
     """
         # Open image to get dimensions
         img = Image.open(image_path)
         width, height = img.size
         # Import segmentation module here to avoid circular imports
         from utils.segmentation import segment_and_extract_features
         # Extract features using advanced segmentation
         _, polygons = segment_and_extract_features(
+            image_path,
             output_mask_path=None,
             feature_type=feature_type,
+            min_area=50,
             simplify_tolerance=2.0,
             merge_distance=5.0
         )
         if not polygons:
             logging.warning("No polygons found in the image after segmentation")
             return {"type": "FeatureCollection", "features": []}
+        # Use the provided original file path if available
+        original_image_path = original_file_path
+        # If no original file path was provided, try to find it
+        if not original_image_path and "_processed" in image_path:
             original_image_path = image_path.replace("_processed", "")
             # Try the original image path but replace the extension with common formats
             if not os.path.exists(original_image_path):
                     if os.path.exists(base_path + ext):
                         original_image_path = base_path + ext
                         break
+        logging.info(f"Using original image path: {original_image_path}")
         # Extract bounds from image if possible
         coords = None
         if original_image_path and os.path.exists(original_image_path):
             logging.info(f"Checking original image for geospatial data: {original_image_path}")
             coords = extract_geo_coordinates_from_image(original_image_path)
         if not coords:
             logging.info("Checking processed image for geospatial data")
             coords = extract_geo_coordinates_from_image(image_path)
         # Use extracted coordinates or defaults
         if coords:
             min_lat, min_lon, max_lat, max_lon = coords
+            logging.info(f"Using extracted coordinates: {min_lon},{min_lat} to {max_lon},{max_lat}")
         else:
+            # Try one more time with rasterio directly on the original image if it exists
+            if original_image_path and os.path.exists(original_image_path) and original_image_path.lower().endswith(('.tif', '.tiff')):
+                try:
+                    import rasterio
+                    from rasterio.warp import transform_bounds
+                    with rasterio.open(original_image_path) as src:
+                        if src.crs is not None:
+                            bounds = src.bounds
+                            if src.crs.to_epsg() != 4326:
+                                west, south, east, north = transform_bounds(
+                                    src.crs, 'EPSG:4326',
+                                    bounds.left, bounds.bottom, bounds.right, bounds.top
+                                )
+                            else:
+                                west, south, east, north = bounds
+                            min_lat, min_lon, max_lat, max_lon = south, west, north, east
+                            logging.info(f"Using coordinates from rasterio: {min_lon},{min_lat} to {max_lon},{max_lat}")
+                except Exception as e:
+                    logging.warning(f"Failed to extract coordinates with rasterio: {str(e)}")
+                    logging.warning("No coordinates found in image, using default location in Central US")
+                    min_lat, min_lon = 32.0, -98.0  # Central US
+                    max_lat, max_lon = 34.0, -96.0
+            else:
+                logging.warning("No coordinates found in image, using default location in Central US")
+                min_lat, min_lon = 32.0, -98.0  # Central US
+                max_lat, max_lon = 34.0, -96.0
         # Convert to GeoJSON with proper transformation
         geojson = convert_to_geojson_with_transform(
             polygons, height, width,
             min_lat=min_lat, min_lon=min_lon,
             max_lat=max_lat, max_lon=max_lon
         )
         return geojson
     except Exception as e:
         logging.error(f"Error in GeoJSON processing: {str(e)}")
         return {"type": "FeatureCollection", "features": []}