Spaces:

MOPAC-DS
/

Geographic_Analysis_Toolkit

Build error

App Files Files Community

dhammo2 commited on May 22, 2025

Commit

617c74e

verified ·

1 Parent(s): 8ac92fc

Upload 3 files

Browse files

Files changed (3) hide show

app.py +37 -0
geo_boundary_translator.py +926 -0
spatial_diffusion.py +1059 -0

app.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import gradio as gr
+from spatial_diffusion import create_diffusion_interface
+from geo_boundary_translator import create_translator_interface
+def create_combined_interface():
+    """Create the main application with tabs for different geographic tools."""
+    with gr.Blocks(title="Geographic Analysis Toolkit") as combined_app:
+        gr.Markdown(
+            """
+            <div style="
+                background-color: #4B23C0;
+                color: white;
+                padding: 20px;
+                text-align: left;
+                font-size: 28px;
+                font-weight: bold;
+                margin: 0;
+                border-radius: 4px;
+            ">
+                MOPAC | DS &nbsp;-&nbsp;🗺️ Geographic Analysis Toolkit
+            </div>
+            """,
+            sanitize_html=False
+        )
+        with gr.Tabs() as tabs:
+            with gr.TabItem("GeoBoundary Translator"):
+                create_translator_interface()
+            with gr.TabItem("Spatial Diffusion Tool"):
+                create_diffusion_interface()
+    return combined_app
+if __name__ == "__main__":
+    app = create_combined_interface()
+    app.launch()

geo_boundary_translator.py ADDED Viewed

	@@ -0,0 +1,926 @@

+import gradio as gr
+import geopandas as gpd
+import pandas as pd
+import json
+import tempfile
+import os
+import shutil
+import matplotlib.pyplot as plt
+import contextily as ctx
+from matplotlib.colors import LinearSegmentedColormap
+import numpy as np
+import fiona
+import zipfile
+from typing import List, Tuple, Dict, Optional, Any, Union
+def extract_columns_from_geo_file(file_obj, progress=None):
+    """Extract column names from a geospatial file (GeoJSON or GeoPackage)."""
+    try:
+        file_extension = os.path.splitext(file_obj.name)[1].lower()
+        msg = f"Processing file: {file_obj.name} with extension {file_extension}"
+        print(msg)
+        if progress is not None:
+            progress(0.1, desc=msg)
+        # Create a temporary directory and file to avoid file locking issues
+        temp_dir = tempfile.mkdtemp()
+        temp_file_path = os.path.join(temp_dir, f"temp_geo_file{file_extension}")
+        # Copy the file content
+        with open(file_obj.name, 'rb') as src_file, open(temp_file_path, 'wb') as dst_file:
+            dst_file.write(src_file.read())
+        msg = f"Created temporary copy at: {temp_file_path}"
+        print(msg)
+        if progress is not None:
+            progress(0.3, desc=msg)
+        if file_extension == '.geojson' or file_extension == '.json':
+            # Read GeoJSON file from the temp copy
+            msg = "Reading GeoJSON file..."
+            if progress is not None:
+                progress(0.5, desc=msg)
+            gdf = gpd.read_file(temp_file_path)
+        elif file_extension == '.gpkg':
+            # For GeoPackage, we need to handle potential multiple layers
+            msg = "Reading GeoPackage layers..."
+            if progress is not None:
+                progress(0.5, desc=msg)
+            layers = fiona.listlayers(temp_file_path)
+            if not layers:
+                raise ValueError("No layers found in GeoPackage.")
+            # If there's only one layer, use it directly
+            if len(layers) == 1:
+                gdf = gpd.read_file(temp_file_path, layer=layers[0])
+            else:
+                # If there are multiple layers, use the first one and warn
+                gdf = gpd.read_file(temp_file_path, layer=layers[0])
+                print(f"Multiple layers found in GeoPackage. Using '{layers[0]}'. Available layers: {layers}")
+        else:
+            raise ValueError(f"Unsupported file format: {file_extension}")
+        # Get column names excluding geometry
+        columns = [col for col in gdf.columns if col != 'geometry']
+        msg = f"Extracted columns: {columns}"
+        print(msg)
+        if progress is not None:
+            progress(0.8, desc=msg)
+        # Clean up the temporary directory
+        try:
+            shutil.rmtree(temp_dir)
+        except Exception as e:
+            print(f"Warning: Could not clean up temporary directory: {str(e)}")
+        if progress is not None:
+            progress(1.0, desc="File processed successfully")
+        return columns, gdf
+    except Exception as e:
+        error_msg = f"Error extracting columns: {str(e)}"
+        print(error_msg)
+        if progress is not None:
+            progress(1.0, desc=error_msg)
+        # Try to clean up if there was an error
+        try:
+            if 'temp_dir' in locals():
+                shutil.rmtree(temp_dir)
+        except:
+            pass
+        return [], None
+def extract_columns_from_csv(file_obj, progress=None):
+    """Extract column names from a CSV file."""
+    try:
+        msg = f"Reading CSV file: {file_obj.name}"
+        print(msg)
+        if progress is not None:
+            progress(0.2, desc=msg)
+        # Read the CSV file
+        df = pd.read_csv(file_obj.name)
+        # Get column names
+        columns = df.columns.tolist()
+        msg = f"Extracted CSV columns: {columns}"
+        print(msg)
+        if progress is not None:
+            progress(1.0, desc=msg)
+        return columns, df
+    except Exception as e:
+        error_msg = f"Error extracting columns from CSV: {str(e)}"
+        print(error_msg)
+        if progress is not None:
+            progress(1.0, desc=error_msg)
+        return [], None
+def create_map_visualization(gdf, title, progress=None):
+    """Create a map visualization of the GeoJSON data."""
+    try:
+        # Check if the GeoDataFrame is valid
+        if gdf is None:
+            msg = "Cannot create map: GeoDataFrame is None"
+            print(msg)
+            if progress is not None:
+                progress(1.0, desc=msg)
+            return None
+        msg = "Creating map visualization..."
+        print(msg)
+        if progress is not None:
+            progress(0.2, desc=msg)
+        # Create a temporary file for the map image
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
+        temp_filename = temp_file.name
+        temp_file.close()
+        # Convert to EPSG:3857 (Web Mercator) for basemap compatibility
+        if gdf.crs is None:
+            # Assume WGS84 if no CRS is specified
+            print("No CRS found in GeoDataFrame, assuming WGS84")
+            gdf = gdf.set_crs("EPSG:4326")
+        if progress is not None:
+            progress(0.4, desc="Converting coordinate system...")
+        gdf_webmerc = gdf.to_crs("EPSG:3857")
+        # Create plot
+        if progress is not None:
+            progress(0.6, desc="Creating plot...")
+        fig, ax = plt.subplots(1, 1, figsize=(10, 8))
+        # Create a custom purple colormap to match theme
+        colors = ['#f5f0ff', '#e6d9ff', '#d6c2ff', '#c7abff', '#b894ff', '#a87dff', '#9966ff', '#8a4fff', '#7b38ff', '#6c21ff', '#5d0af0', '#4B23C0']
+        custom_cmap = LinearSegmentedColormap.from_list('custom_purples', colors)
+        # Plot the data with random colors for each polygon
+        random_values = np.random.rand(len(gdf_webmerc))
+        gdf_webmerc.plot(ax=ax, column=random_values, cmap=custom_cmap,
+                alpha=0.7, edgecolor='#333333', linewidth=0.5)
+        # Add basemap
+        if progress is not None:
+            progress(0.8, desc="Adding basemap...")
+        try:
+            ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
+        except Exception as e:
+            print(f"Could not add basemap: {str(e)}")
+        # Add title
+        ax.set_title(title, fontsize=16, color='#4B23C0')  # Purple title to match theme
+        # Remove axes
+        ax.set_axis_off()
+        # Tight layout
+        plt.tight_layout()
+        # Save figure
+        if progress is not None:
+            progress(0.9, desc="Saving map image...")
+        plt.savefig(temp_filename, dpi=150, bbox_inches='tight')
+        plt.close(fig)
+        if progress is not None:
+            progress(1.0, desc="Map created successfully")
+        return temp_filename
+    except Exception as e:
+        error_msg = f"Error creating map visualization: {str(e)}"
+        print(error_msg)
+        if progress is not None:
+            progress(1.0, desc=error_msg)
+        return None
+def calculate_areal_intersection(original_gdf, original_id, new_gdf, new_id, progress=None):
+    """
+    Calculate the areal intersection between two geographic datasets.
+    Args:
+        original_gdf (GeoDataFrame): Original geography
+        original_id (str): ID column in original geography
+        new_gdf (GeoDataFrame): New geography
+        new_id (str): ID column in new geography
+        progress (gr.Progress, optional): Gradio progress tracker
+    Returns:
+        DataFrame: Containing the percentage overlap matrix
+    """
+    try:
+        total_combinations = len(original_gdf) * len(new_gdf)
+        msg = f"Calculating areal intersection between {len(original_gdf)} original areas and {len(new_gdf)} new areas..."
+        print(msg)
+        if progress is not None:
+            progress(0, desc=msg)
+        # Ensure both GeoDataFrames have the same CRS
+        if original_gdf.crs != new_gdf.crs:
+            crs_msg = f"Converting CRS from {original_gdf.crs} to {new_gdf.crs}"
+            print(crs_msg)
+            if progress is not None:
+                progress(0, desc=crs_msg)
+            original_gdf = original_gdf.to_crs(new_gdf.crs)
+        # Create empty dataframe to store results
+        overlap_df = pd.DataFrame(columns=['original_id', 'new_id', 'area_original', 'area_new', 'area_overlap', 'pct_of_original', 'pct_of_new'])
+        # Initialize progress tracking
+        processed = 0
+        # Iterate through each pair of geometries
+        for idx1, row1 in original_gdf.iterrows():
+            # Get the original ID and geometry
+            orig_id = row1[original_id]
+            orig_geom = row1['geometry']
+            orig_area = orig_geom.area
+            for idx2, row2 in new_gdf.iterrows():
+                # Get the new ID and geometry
+                new_id_val = row2[new_id]
+                new_geom = row2['geometry']
+                new_area = new_geom.area
+                # Check if geometries intersect
+                if orig_geom.intersects(new_geom):
+                    # Calculate the intersection
+                    intersection = orig_geom.intersection(new_geom)
+                    intersection_area = intersection.area
+                    # Calculate percentages
+                    pct_of_original = (intersection_area / orig_area) * 100 if orig_area > 0 else 0
+                    pct_of_new = (intersection_area / new_area) * 100 if new_area > 0 else 0
+                    # Add to results dataframe if overlap is substantial (>0.01%)
+                    if pct_of_original > 0.01 or pct_of_new > 0.01:
+                        overlap_df = pd.concat([overlap_df, pd.DataFrame({
+                            'original_id': [orig_id],
+                            'new_id': [new_id_val],
+                            'area_original': [orig_area],
+                            'area_new': [new_area],
+                            'area_overlap': [intersection_area],
+                            'pct_of_original': [pct_of_original],
+                            'pct_of_new': [pct_of_new]
+                        })], ignore_index=True)
+                # Update progress
+                processed += 1
+                progress_pct = processed / total_combinations
+                # Update the progress bar
+                if progress is not None:
+                    progress_msg = f"Calculating intersections: {int(progress_pct*100)}% complete"
+                    progress(progress_pct, desc=progress_msg)
+                # Also log to console every 10%
+                if int(progress_pct*100) % 10 == 0 and int(progress_pct*100) > 0:
+                    print(f"Intersection calculation: {int(progress_pct*100)}% complete")
+        complete_msg = f"Intersection calculation complete. Found {len(overlap_df)} intersections."
+        print(complete_msg)
+        if progress is not None:
+            progress(1.0, desc=complete_msg)
+        return overlap_df
+    except Exception as e:
+        error_msg = f"Error calculating areal intersection: {str(e)}"
+        print(error_msg)
+        if progress is not None:
+            progress(1.0, desc=error_msg)
+        return pd.DataFrame()
+def generate_weights_matrix(overlap_df, progress=None):
+    """
+    Generate a weights matrix from the overlap dataframe.
+    Args:
+        overlap_df (DataFrame): Output from calculate_areal_intersection
+        progress (gr.Progress, optional): Gradio progress tracker
+    Returns:
+        DataFrame: Weights matrix with original IDs as rows and new IDs as columns
+    """
+    try:
+        msg = "Generating weights matrix from intersection data..."
+        print(msg)
+        if progress is not None:
+            progress(0.1, desc=msg)
+        # Pivot the overlap dataframe to create a matrix
+        # Values are the percentage of the original area that goes into each new area
+        weights_matrix = overlap_df.pivot(
+            index='original_id',
+            columns='new_id',
+            values='pct_of_original'
+        ).fillna(0)
+        # Check that rows sum to approximately 100%
+        row_sums = weights_matrix.sum(axis=1)
+        stats_msg = f"Row sum statistics: min={row_sums.min():.2f}%, max={row_sums.max():.2f}%, mean={row_sums.mean():.2f}%"
+        print(stats_msg)
+        if progress is not None:
+            progress(1.0, desc="Weights matrix generated successfully")
+        return weights_matrix
+    except Exception as e:
+        error_msg = f"Error generating weights matrix: {str(e)}"
+        print(error_msg)
+        if progress is not None:
+            progress(1.0, desc=error_msg)
+        return pd.DataFrame()
+def check_fields(original_file, original_id, new_file, new_id, stats_file=None, stats_id=None, stats_cols=None):
+    """
+    Check if all required fields are filled to enable operations.
+    Returns (translation_ready, weights_only_ready, message)
+    """
+    # Check if we can calculate weights only (no statistics needed)
+    weights_only_ready = (original_file is not None and original_id is not None and
+                         new_file is not None and new_id is not None)
+    # Check if we can do full translation
+    translation_ready = (weights_only_ready and
+                        stats_file is not None and stats_id is not None and
+                        stats_cols is not None and len(stats_cols) > 0)
+    if translation_ready:
+        return True, True, "Ready to translate statistics"
+    elif weights_only_ready:
+        return False, True, "Ready to calculate weights matrix (no statistics will be translated)"
+    elif original_file is not None and new_file is not None:
+        return False, False, "Please select ID columns"
+    else:
+        return False, False, "Please upload required files"
+def calculate_weights_only(original_file, original_id, new_file, new_id, progress=None):
+    """
+    Calculate the weights matrix between two geographies without translating statistics.
+    Args:
+        original_file: File object for original geography
+        original_id: ID column in original geography
+        new_file: File object for new geography
+        new_id: ID column in new geography
+        progress (gr.Progress, optional): Gradio progress tracker
+    Returns:
+        Tuple of (results_visible, summary_text, zip_path, weights_path)
+    """
+    try:
+        # Read the geographies
+        if progress is not None:
+            progress(0, desc="Reading original geography...")
+        print("Reading original geography...")
+        orig_columns, orig_gdf = extract_columns_from_geo_file(original_file, progress)
+        if progress is not None:
+            progress(0.1, desc="Reading new geography...")
+        print("Reading new geography...")
+        new_columns, new_gdf = extract_columns_from_geo_file(new_file, progress)
+        # Calculate areal intersection
+        if progress is not None:
+            progress(0.2, desc="Preparing to calculate areal intersection...")
+        print("Calculating areal intersection...")
+        overlap_df = calculate_areal_intersection(orig_gdf, original_id, new_gdf, new_id, progress)
+        if overlap_df.empty:
+            if progress is not None:
+                progress(1.0, desc="Error: Could not calculate area overlap between geographies.")
+            return True, "Error: Could not calculate area overlap between geographies. Check that they cover the same region.", None, None
+        # Generate weights matrix
+        if progress is not None:
+            progress(0.9, desc="Generating weights matrix...")
+        print("Generating weights matrix...")
+        weights_matrix = generate_weights_matrix(overlap_df, progress)
+        if weights_matrix.empty:
+            if progress is not None:
+                progress(1.0, desc="Error: Could not generate weights matrix.")
+            return True, "Error: Could not generate weights matrix.", None, None
+        # Save weights matrix to a CSV file
+        if progress is not None:
+            progress(0.95, desc="Saving results...")
+        temp_weights_file = tempfile.NamedTemporaryFile(delete=False, suffix='_weights.csv')
+        weights_path = temp_weights_file.name
+        temp_weights_file.close()
+        weights_matrix.to_csv(weights_path)
+        print(f"Saved weights matrix to {weights_path}")
+        # Also save the full intersection data which includes more detailed overlap information
+        temp_overlap_file = tempfile.NamedTemporaryFile(delete=False, suffix='_overlap_details.csv')
+        overlap_path = temp_overlap_file.name
+        temp_overlap_file.close()
+        overlap_df.to_csv(overlap_path, index=False)
+        print(f"Saved detailed overlap data to {overlap_path}")
+        # Create a ZIP file with all outputs
+        zip_file = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
+        zip_path = zip_file.name
+        zip_file.close()
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            zipf.write(weights_path, arcname=f"weights_matrix.csv")
+            zipf.write(overlap_path, arcname=f"overlap_details.csv")
+        print(f"Created ZIP archive at {zip_path}")
+        # Create summary text
+        summary = f"""
+        Weights calculation complete!
+        - Processed {len(orig_gdf)} original areas and {len(new_gdf)} new areas.
+        - Found {len(overlap_df)} geographic intersections between areas.
+        The download contains:
+        - weights_matrix.csv: The weights matrix for future translations
+        - overlap_details.csv: Detailed area overlap information
+        """
+        if progress is not None:
+            progress(1.0, desc="Weights calculation complete!")
+        return True, summary, zip_path, weights_path
+    except Exception as e:
+        print(f"Error calculating weights: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        if progress is not None:
+            progress(1.0, desc=f"Error during weights calculation: {str(e)}")
+        return True, f"Error during weights calculation: {str(e)}", None, None
+def translate_statistics(original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols, progress=None):
+    """
+    Translate statistics from the original geography to the new geography.
+    Args:
+        original_file: File object for original geography
+        original_id: ID column in original geography
+        new_file: File object for new geography
+        new_id: ID column in new geography
+        stats_file: File object for statistics
+        stats_id: ID column in statistics
+        stats_cols: List of statistic columns to translate
+        progress (gr.Progress, optional): Gradio progress tracker
+    Returns:
+        Tuple of (results_visible, summary_text, output_file_path, weights_file_path)
+    """
+    try:
+        # Read the geographies
+        if progress is not None:
+            progress(0.05, desc="Reading original geography...")
+        print("Reading original geography...")
+        orig_columns, orig_gdf = extract_columns_from_geo_file(original_file, progress)
+        if progress is not None:
+            progress(0.1, desc="Reading new geography...")
+        print("Reading new geography...")
+        new_columns, new_gdf = extract_columns_from_geo_file(new_file, progress)
+        if progress is not None:
+            progress(0.15, desc="Reading statistics...")
+        print("Reading statistics...")
+        stats_columns, stats_df = extract_columns_from_csv(stats_file, progress)
+        # Check that the stats_id exists in the original geography
+        if stats_id not in stats_df.columns:
+            if progress is not None:
+                progress(1.0, desc=f"Error: Statistics file does not contain column '{stats_id}'")
+            return True, f"Error: Statistics file does not contain column '{stats_id}'", None, None
+        # Create lookup between stats and original geography
+        if progress is not None:
+            progress(0.2, desc="Creating ID lookup between statistics and original geography...")
+        print("Creating ID lookup between statistics and original geography...")
+        stats_ids = set(stats_df[stats_id].astype(str))
+        orig_ids = set(orig_gdf[original_id].astype(str))
+        # Check for matches
+        matching_ids = stats_ids.intersection(orig_ids)
+        missing_ids = stats_ids - orig_ids
+        match_percent = (len(matching_ids) / len(stats_ids)) * 100 if stats_ids else 0
+        match_msg = f"ID match: {len(matching_ids)}/{len(stats_ids)} ({match_percent:.1f}%)"
+        print(match_msg)
+        if match_percent < 50:
+            warning_msg = f"Warning: Low ID match rate ({match_percent:.1f}%). Check ID column selections."
+            if progress is not None:
+                progress(1.0, desc=warning_msg)
+            return True, warning_msg, None, None
+        # Calculate areal intersection
+        if progress is not None:
+            progress(0.25, desc="Preparing to calculate areal intersection...")
+        print("Calculating areal intersection...")
+        overlap_df = calculate_areal_intersection(orig_gdf, original_id, new_gdf, new_id, progress)
+        if overlap_df.empty:
+            error_msg = "Error: Could not calculate area overlap between geographies. Check that they cover the same region."
+            if progress is not None:
+                progress(1.0, desc=error_msg)
+            return True, error_msg, None, None
+        # Generate weights matrix
+        if progress is not None:
+            progress(0.75, desc="Generating weights matrix...")
+        print("Generating weights matrix...")
+        weights_matrix = generate_weights_matrix(overlap_df, progress)
+        if weights_matrix.empty:
+            error_msg = "Error: Could not generate weights matrix."
+            if progress is not None:
+                progress(1.0, desc=error_msg)
+            return True, error_msg, None, None
+        # Save weights matrix to a CSV file
+        if progress is not None:
+            progress(0.8, desc="Saving weights matrix...")
+        temp_weights_file = tempfile.NamedTemporaryFile(delete=False, suffix='_weights.csv')
+        weights_path = temp_weights_file.name
+        temp_weights_file.close()
+        weights_matrix.to_csv(weights_path)
+        print(f"Saved weights matrix to {weights_path}")
+        # Also save the full intersection data which includes more detailed overlap information
+        temp_overlap_file = tempfile.NamedTemporaryFile(delete=False, suffix='_overlap_details.csv')
+        overlap_path = temp_overlap_file.name
+        temp_overlap_file.close()
+        overlap_df.to_csv(overlap_path, index=False)
+        print(f"Saved detailed overlap data to {overlap_path}")
+        # Create output dataframe with new geography IDs
+        if progress is not None:
+            progress(0.85, desc="Creating output dataframe...")
+        print("Creating output dataframe...")
+        output_df = pd.DataFrame({new_id: new_gdf[new_id]})
+        # Translate each selected statistic
+        total_stats = len(stats_cols)
+        for i, stat_col in enumerate(stats_cols):
+            stat_msg = f"Translating statistic: {stat_col} ({i+1}/{total_stats})"
+            if progress is not None:
+                progress(0.85 + (0.1 * (i / total_stats)), desc=stat_msg)
+            print(stat_msg)
+            # Check if the statistic exists in the stats dataframe
+            if stat_col not in stats_df.columns:
+                print(f"Warning: Statistic column '{stat_col}' not found in statistics file.")
+                continue
+            # Merge stats with original geography
+            merged_df = pd.merge(
+                orig_gdf[[original_id]],
+                stats_df[[stats_id, stat_col]],
+                left_on=original_id,
+                right_on=stats_id,
+                how='left'
+            )
+            # Create a series with original IDs as the index and statistic values
+            stat_series = merged_df.set_index(original_id)[stat_col]
+            # Apply weights to translate statistics to new geography
+            new_stat = {}
+            for new_area_id in weights_matrix.columns:
+                # Get weights for this new area
+                area_weights = weights_matrix[new_area_id]
+                # Calculate weighted sum
+                weighted_sum = 0
+                total_weight = 0
+                for orig_area_id, weight in area_weights.items():
+                    if orig_area_id in stat_series and not pd.isna(stat_series[orig_area_id]):
+                        weighted_sum += stat_series[orig_area_id] * (weight / 100)
+                        total_weight += weight / 100
+                # Add to new statistic dictionary
+                if total_weight > 0:
+                    new_stat[new_area_id] = weighted_sum
+                else:
+                    new_stat[new_area_id] = np.nan
+            # Add to output dataframe
+            output_df[stat_col] = output_df[new_id].map(new_stat)
+        # Save translated statistics to a CSV
+        if progress is not None:
+            progress(0.95, desc="Saving translated statistics...")
+        temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix='_translated_stats.csv')
+        output_path = temp_output_file.name
+        temp_output_file.close()
+        output_df.to_csv(output_path, index=False)
+        print(f"Saved translated statistics to {output_path}")
+        # Create a ZIP file with all outputs
+        zip_file = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
+        zip_path = zip_file.name
+        zip_file.close()
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            zipf.write(output_path, arcname=f"translated_statistics.csv")
+            zipf.write(weights_path, arcname=f"weights_matrix.csv")
+            zipf.write(overlap_path, arcname=f"overlap_details.csv")
+        print(f"Created ZIP archive at {zip_path}")
+        # Create summary text
+        summary = f"""
+        Translation complete!
+        - Translated {len(stats_cols)} statistics from {len(orig_gdf)} original areas to {len(new_gdf)} new areas.
+        - ID match rate: {match_percent:.1f}%
+        - Found {len(overlap_df)} geographic intersections between areas.
+        The download contains:
+        - translated_statistics.csv: The statistics mapped to the new geography
+        - weights_matrix.csv: The weights matrix for future translations
+        - overlap_details.csv: Detailed area overlap information
+        """
+        if progress is not None:
+            progress(1.0, desc="Translation complete!")
+        return True, summary, zip_path, weights_path
+    except Exception as e:
+        print(f"Error in translation: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        if progress is not None:
+            progress(1.0, desc=f"Error during translation: {str(e)}")
+        return True, f"Error during translation: {str(e)}", None, None
+def create_translator_interface():
+    with gr.Blocks() as translator_interface:
+        # Header
+        gr.Markdown("## 🗺️ GeoBoundary Translator&nbsp;-&nbsp; Translate Statistics into Different Geographies")
+        # Main content in three columns
+        with gr.Row():
+            # First column - Original Geography
+            with gr.Column(variant="panel", scale=1, min_width=300, elem_id="original-column"):
+                gr.Markdown("## Original Geography")
+                gr.Markdown("*Supported formats: GeoJSON, GeoPackage (.geojson, .json, .gpkg)*")
+                original_file = gr.File(
+                    label="Upload Geographic File",
+                    file_types=[".geojson", ".json", ".gpkg"]
+                )
+                original_id = gr.Dropdown(label="Select Unique ID Column", choices=[])
+                original_map = gr.Image(label="Map View", type="filepath")
+            # Second column - New Geography
+            with gr.Column(variant="panel", scale=1, min_width=300, elem_id="new-column"):
+                gr.Markdown("## New Geography")
+                gr.Markdown("*Supported formats: GeoJSON, GeoPackage (.geojson, .json, .gpkg)*")
+                new_file = gr.File(
+                    label="Upload Geographic File",
+                    file_types=[".geojson", ".json", ".gpkg"]
+                )
+                new_id = gr.Dropdown(label="Select Unique ID Column", choices=[])
+                new_map = gr.Image(label="Map View", type="filepath")
+            # Third column - Statistics and Translation
+            with gr.Column(variant="panel", scale=1, min_width=300, elem_id="stats-column"):
+                gr.Markdown("## Statistics & Translation")
+                stats_file = gr.File(label="Upload CSV File with Statistics (optional for weights only)", file_types=[".csv"])
+                stats_id = gr.Dropdown(label="Select Unique ID Column", choices=[])
+                # Add component for selecting statistics columns
+                stats_cols = gr.CheckboxGroup(label="Select Statistics Columns to Transfer", choices=[], visible=False)
+                # Translation controls
+                gr.Markdown("### Translation Controls")
+                with gr.Row():
+                    translate_btn = gr.Button("Translate Statistics", variant="primary", interactive=False)
+                    calc_weights_btn = gr.Button("Calculate Weights Only", variant="secondary", interactive=False)
+                # Processing indicator - just use one status text
+                status_text = gr.Textbox(label="Status", interactive=False, value="Ready")
+                # Placeholder for results
+                with gr.Accordion("Results", open=False, visible=False) as results_accordion:
+                    results_summary = gr.Textbox(label="Summary", lines=5)
+                    with gr.Row():
+                        download_all_btn = gr.Button("Download All Files (ZIP)")
+                        download_weights_btn = gr.Button("Download Weights Matrix")
+                    download_output = gr.File(label="Download", visible=False)
+        # Connect components with their update functions
+        def update_orig_dropdown_choices(file_obj, progress=gr.Progress()):
+            if file_obj is None:
+                return gr.Dropdown(choices=[], value=None), None
+            columns, gdf = extract_columns_from_geo_file(file_obj, progress)
+            likely_id_cols = [col for col in columns if any(id_term in col.lower() for id_term in ['id', 'code', 'key'])]
+            default_value = likely_id_cols[0] if likely_id_cols else None
+            return gr.Dropdown(choices=columns, value=default_value), create_map_visualization(gdf, "Original Geography", progress)
+        def update_new_dropdown_choices(file_obj, progress=gr.Progress()):
+            if file_obj is None:
+                return gr.Dropdown(choices=[], value=None), None
+            columns, gdf = extract_columns_from_geo_file(file_obj, progress)
+            likely_id_cols = [col for col in columns if any(id_term in col.lower() for id_term in ['id', 'code', 'key'])]
+            default_value = likely_id_cols[0] if likely_id_cols else None
+            return gr.Dropdown(choices=columns, value=default_value), create_map_visualization(gdf, "New Geography", progress)
+        def update_stats_dropdown_choices(file_obj, progress=gr.Progress()):
+            if file_obj is None:
+                return gr.Dropdown(choices=[], value=None), gr.CheckboxGroup(choices=[], value=[], visible=False)
+            columns, df = extract_columns_from_csv(file_obj, progress)
+            likely_id_cols = [col for col in columns if any(id_term in col.lower() for id_term in ['id', 'code', 'key'])]
+            default_value = likely_id_cols[0] if likely_id_cols else None
+            numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
+            stat_cols = [col for col in numeric_cols if col not in likely_id_cols]
+            default_selection = stat_cols[:5] if len(stat_cols) > 5 else stat_cols
+            return gr.Dropdown(choices=columns, value=default_value), gr.CheckboxGroup(choices=stat_cols, value=default_selection, visible=True)
+        original_file.change(
+            fn=update_orig_dropdown_choices,
+            inputs=original_file,
+            outputs=[original_id, original_map]
+        )
+        new_file.change(
+            fn=update_new_dropdown_choices,
+            inputs=new_file,
+            outputs=[new_id, new_map]
+        )
+        stats_file.change(
+            fn=update_stats_dropdown_choices,
+            inputs=stats_file,
+            outputs=[stats_id, stats_cols]
+        )
+        # Function to check fields and update button status
+        def update_button_status(original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols):
+            translation_ready, weights_only_ready, message = check_fields(
+                original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols
+            )
+            return (
+                gr.Button(interactive=translation_ready),  # translate_btn
+                gr.Button(interactive=weights_only_ready),  # calc_weights_btn
+                message  # status_text
+            )
+        # Connect all inputs to update button status
+        for component in [original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols]:
+            component.change(
+                fn=update_button_status,
+                inputs=[original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols],
+                outputs=[translate_btn, calc_weights_btn, status_text]
+            )
+        # Handlers for translation and weights calculation
+        def translate_statistics_handler(original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols, progress=gr.Progress()):
+            # Call the actual function with progress tracking
+            visible, summary, zip_path, weights_path = translate_statistics(
+                original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols,
+                progress=progress
+            )
+            # Store the file paths for download buttons
+            return (
+                gr.Accordion(visible=True, open=True),  # results_accordion
+                summary,  # results_summary
+                gr.Button(visible=zip_path is not None),  # download_all_btn
+                gr.Button(visible=weights_path is not None),  # download_weights_btn
+                zip_path,  # State for ZIP
+                weights_path,  # State for weights
+                "Processing complete"  # status text
+            )
+        def calculate_weights_handler(original_file, original_id, new_file, new_id, progress=gr.Progress()):
+            # Call the actual function with progress tracking
+            visible, summary, zip_path, weights_path = calculate_weights_only(
+                original_file, original_id, new_file, new_id,
+                progress=progress
+            )
+            # Store the file paths for download buttons
+            return (
+                gr.Accordion(visible=True, open=True),  # results_accordion
+                summary,  # results_summary
+                gr.Button(visible=zip_path is not None),  # download_all_btn
+                gr.Button(visible=weights_path is not None),  # download_weights_btn
+                zip_path,  # State for ZIP
+                weights_path,  # State for weights
+                "Processing complete"  # status text
+            )
+        # Connect buttons with pre-click handlers to show processing status
+        def show_processing():
+            return "Processing started..."
+        translate_btn.click(
+            fn=show_processing,
+            inputs=[],
+            outputs=[status_text],
+            queue=False
+        ).then(
+            fn=translate_statistics_handler,
+            inputs=[original_file, original_id, new_file, new_id, stats_file, stats_id, stats_cols],
+            outputs=[
+                results_accordion,
+                results_summary,
+                download_all_btn,
+                download_weights_btn,
+                gr.State(),  # For ZIP path
+                gr.State(),  # For weights path
+                status_text
+            ]
+        )
+        calc_weights_btn.click(
+            fn=show_processing,
+            inputs=[],
+            outputs=[status_text],
+            queue=False
+        ).then(
+            fn=calculate_weights_handler,
+            inputs=[original_file, original_id, new_file, new_id],
+            outputs=[
+                results_accordion,
+                results_summary,
+                download_all_btn,
+                download_weights_btn,
+                gr.State(),  # For ZIP path
+                gr.State(),  # For weights path
+                status_text
+            ]
+        )
+        # Handler for download buttons
+        def download_zip(zip_path):
+            if zip_path:
+                return gr.File(value=zip_path, visible=True)
+            return gr.File(visible=False)
+        def download_weights(weights_path):
+            if weights_path:
+                return gr.File(value=weights_path, visible=True)
+            return gr.File(visible=False)
+        # Connect download buttons
+        download_all_btn.click(
+            fn=download_zip,
+            inputs=[gr.State()],  # ZIP path
+            outputs=[download_output]
+        )
+        download_weights_btn.click(
+            fn=download_weights,
+            inputs=[gr.State()],  # Weights path
+            outputs=[download_output]
+        )
+        # CSS for column styling
+        translator_interface.load(
+            js="""
+            function() {
+                // Set background colors for columns
+                var originalColumn = document.getElementById('original-column');
+                var newColumn = document.getElementById('new-column');
+                var statsColumn = document.getElementById('stats-column');
+                if (originalColumn) originalColumn.style.backgroundColor = '#f0f8ff'; // Light blue
+                if (newColumn) newColumn.style.backgroundColor = '#fff8f0'; // Light orange
+                if (statsColumn) statsColumn.style.backgroundColor = '#f0fff0'; // Light green
+            }
+            """
+        )
+    return translator_interface
+if __name__ == "__main__":
+    # This allows the module to be run directly for testing
+    app = create_translator_interface()
+    app.launch()

spatial_diffusion.py ADDED Viewed

	@@ -0,0 +1,1059 @@

+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from shapely.geometry import Point, Polygon
+import random
+import datetime
+import gradio as gr
+import tempfile
+import os
+import requests
+import json
+from typing import List, Tuple, Optional, Dict, Any, Union
+def fetch_osm_exclusion_zones(bounds: Tuple[float, float, float, float], exclusion_types: List[str]) -> Optional[Any]:
+    """
+    Fetch exclusion zones from OpenStreetMap using Overpass API.
+    Args:
+        bounds: (min_lat, min_lon, max_lat, max_lon) bounding box
+        exclusion_types: List of exclusion types to fetch
+    Returns:
+        GeoDataFrame with exclusion polygons or None if failed
+    """
+    try:
+        import geopandas as gpd
+        from shapely.geometry import Polygon, MultiPolygon, LineString
+        # Overpass API endpoint
+        overpass_url = "http://overpass-api.de/api/interpreter"
+        # Build Overpass query based on selected exclusion types
+        queries = []
+        if "Water bodies" in exclusion_types:
+            # Get both water polygons AND linear waterways
+            queries.extend([
+                # Water area polygons
+                f'way["natural"="water"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'relation["natural"="water"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["landuse"="reservoir"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["water"="lake"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["water"="pond"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                # Linear waterways (rivers, streams, canals)
+                f'way["waterway"="river"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["waterway"="stream"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["waterway"="canal"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
+            ])
+        if "Parks & green spaces" in exclusion_types:
+            queries.extend([
+                f'way["leisure"="park"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["landuse"="forest"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["landuse"="grass"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["natural"="wood"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
+            ])
+        if "Industrial areas" in exclusion_types:
+            queries.extend([
+                f'way["landuse"="industrial"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
+                f'way["landuse"="commercial"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
+            ])
+        if "Major roads" in exclusion_types:
+            queries.extend([
+                f'way["highway"~"motorway|trunk|primary"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
+            ])
+        if not queries:
+            return None
+        # Build complete Overpass query
+        overpass_query = f"""
+        [out:json][timeout:25];
+        (
+        {chr(10).join(queries)}
+        );
+        out geom;
+        """
+        print(f"Fetching OSM data for exclusion zones: {exclusion_types}")
+        # Make request to Overpass API
+        response = requests.get(overpass_url, params={'data': overpass_query})
+        response.raise_for_status()
+        data = response.json()
+        if 'elements' not in data or not data['elements']:
+            print("No exclusion zones found in the specified area")
+            return None
+        # Convert OSM data to polygons
+        polygons = []
+        zone_types = []
+        for element in data['elements']:
+            try:
+                if element['type'] == 'way' and 'geometry' in element:
+                    tags = element.get('tags', {})
+                    # Determine what type of feature this is
+                    zone_type = None
+                    if 'natural' in tags and tags['natural'] == 'water':
+                        zone_type = 'Water'
+                    elif 'landuse' in tags and tags['landuse'] == 'reservoir':
+                        zone_type = 'Water'
+                    elif 'water' in tags:
+                        zone_type = 'Water'
+                    elif 'waterway' in tags and tags['waterway'] in ['river', 'stream', 'canal']:
+                        zone_type = 'Water'
+                    elif 'leisure' in tags and tags['leisure'] == 'park':
+                        zone_type = 'Park'
+                    elif 'landuse' in tags and tags['landuse'] in ['forest', 'grass']:
+                        zone_type = 'Green space'
+                    elif 'natural' in tags and tags['natural'] == 'wood':
+                        zone_type = 'Forest'
+                    elif 'landuse' in tags and tags['landuse'] in ['industrial', 'commercial']:
+                        zone_type = 'Industrial/Commercial'
+                    elif 'highway' in tags:
+                        zone_type = 'Major road'
+                    if zone_type is None:
+                        continue
+                    # Create polygon from way geometry
+                    coords = [(node['lon'], node['lat']) for node in element['geometry']]
+                    # Handle different geometry types
+                    if 'waterway' in tags or 'highway' in tags:
+                        # For linear features (rivers, roads), create a buffered polygon from the line
+                        if len(coords) >= 2:
+                            try:
+                                line = LineString(coords)
+                                # Buffer size depends on feature type
+                                if 'waterway' in tags:
+                                    if tags['waterway'] == 'river':
+                                        buffer_size = 50 / 111320  # Rivers: ~50 meters
+                                    elif tags['waterway'] == 'canal':
+                                        buffer_size = 30 / 111320  # Canals: ~30 meters
+                                    else:  # streams
+                                        buffer_size = 20 / 111320  # Streams: ~20 meters
+                                else:  # highways
+                                    buffer_size = 25 / 111320  # Roads: ~25 meters
+                                polygon = line.buffer(buffer_size)
+                                if polygon.is_valid and polygon.area > 0:
+                                    polygons.append(polygon)
+                                    zone_types.append(zone_type)
+                            except Exception as e:
+                                print(f"Error buffering linear feature: {str(e)}")
+                                continue
+                    else:
+                        # For areas, create closed polygons
+                        if len(coords) > 2:
+                            # Close polygon if not already closed
+                            if coords[0] != coords[-1]:
+                                coords.append(coords[0])
+                            if len(coords) >= 4:  # Valid polygon needs at least 4 points
+                                try:
+                                    polygon = Polygon(coords)
+                                    if polygon.is_valid and polygon.area > 0:
+                                        polygons.append(polygon)
+                                        zone_types.append(zone_type)
+                                except Exception as e:
+                                    print(f"Error creating polygon: {str(e)}")
+                                    continue
+            except Exception as e:
+                print(f"Error processing OSM element: {str(e)}")
+                continue
+        if not polygons:
+            print("No valid polygons found in OSM data")
+            return None
+        # Create GeoDataFrame
+        gdf = gpd.GeoDataFrame(
+            {'zone_type': zone_types},
+            geometry=polygons,
+            crs='EPSG:4326'
+        )
+        print(f"Successfully fetched {len(gdf)} exclusion zones from OpenStreetMap")
+        print(f"Zone types found: {gdf['zone_type'].value_counts().to_dict()}")
+        return gdf
+    except ImportError:
+        print("GeoPandas not available for OSM processing")
+        return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching data from OpenStreetMap: {str(e)}")
+        return None
+    except Exception as e:
+        print(f"Error processing OpenStreetMap data: {str(e)}")
+        return None
+def calculate_bounds_from_points(input_df: pd.DataFrame, buffer_km: float = 2.0) -> Tuple[float, float, float, float]:
+    """Calculate bounding box around input points with buffer"""
+    # Get min/max coordinates
+    min_lat = input_df['lat'].min()
+    max_lat = input_df['lat'].max()
+    min_lon = input_df['lon'].min()
+    max_lon = input_df['lon'].max()
+    # Add buffer (approximate conversion from km to degrees)
+    buffer_deg = buffer_km / 111.0  # Rough conversion: 1 degree ≈ 111 km
+    return (
+        min_lat - buffer_deg,  # min_lat
+        min_lon - buffer_deg,  # min_lon
+        max_lat + buffer_deg,  # max_lat
+        max_lon + buffer_deg   # max_lon
+    )
+class SpatialDiffuser:
+    """
+    Class for performing spatial diffusion - takes points with counts and diffuses them
+    according to specified distributions within given radii, with optional exclusion zones.
+    """
+    def __init__(self):
+        self.distribution_methods = {
+            "uniform": self._uniform_distribution,
+            "normal": self._normal_distribution,
+            "exponential_decay": self._exponential_decay,
+            "distance_weighted": self._distance_weighted
+        }
+    def diffuse_points(self,
+                       input_data: pd.DataFrame,
+                       distribution_type: str = "uniform",
+                       global_radius: Optional[float] = None,
+                       time_start: Optional[datetime.datetime] = None,
+                       time_end: Optional[datetime.datetime] = None,
+                       seed: Optional[int] = None,
+                       exclusion_zones_gdf: Optional[Any] = None) -> pd.DataFrame:
+        """
+        Generate diffused points based on input coordinates and counts.
+        Args:
+            input_data: DataFrame with columns: lat, lon, count, radius (optional)
+            distribution_type: Type of spatial distribution to use
+            global_radius: Radius to use for all points if not specified individually (in meters)
+            time_start: Start time for temporal distribution
+            time_end: End time for temporal distribution
+            seed: Random seed for reproducible results
+            exclusion_zones_gdf: GeoDataFrame with polygons to exclude points from
+        Returns:
+            DataFrame with columns: lat, lon, source_id, timestamp (if temporal)
+        """
+        # Set random seed if provided
+        if seed is not None:
+            np.random.seed(seed)
+            random.seed(seed)
+        if distribution_type not in self.distribution_methods:
+            raise ValueError(f"Distribution type '{distribution_type}' not supported. Choose from: {list(self.distribution_methods.keys())}")
+        # Initialize list to hold all generated points
+        all_points = []
+        # Generate points for each input location
+        for idx, row in input_data.iterrows():
+            # Get radius (either from row or global)
+            radius = row.get('radius', global_radius)
+            if radius is None:
+                raise ValueError("Radius must be specified either globally or per point")
+            # Get count
+            count = int(row['count'])
+            if count <= 0:
+                continue
+            # Generate points with exclusion zone filtering
+            new_points = self._generate_points_with_exclusions(
+                lat=row['lat'],
+                lon=row['lon'],
+                count=count,
+                radius=radius,
+                distribution_type=distribution_type,
+                exclusion_zones_gdf=exclusion_zones_gdf
+            )
+            # Add source identifier
+            source_ids = [idx] * len(new_points)
+            # Add timestamps if temporal distribution is requested
+            if time_start is not None and time_end is not None:
+                timestamps = self._generate_timestamps(len(new_points), time_start, time_end)
+                # Combine points with metadata
+                for i, point in enumerate(new_points):
+                    all_points.append({
+                        'lat': point[0],
+                        'lon': point[1],
+                        'source_id': source_ids[i],
+                        'timestamp': timestamps[i]
+                    })
+            else:
+                # Combine points with metadata without timestamps
+                for i, point in enumerate(new_points):
+                    all_points.append({
+                        'lat': point[0],
+                        'lon': point[1],
+                        'source_id': source_ids[i]
+                    })
+        # Convert to DataFrame
+        result = pd.DataFrame(all_points)
+        return result
+    def _generate_points_with_exclusions(self, lat: float, lon: float, count: int, radius: float,
+                                       distribution_type: str, exclusion_zones_gdf: Optional[Any] = None) -> List[Tuple[float, float]]:
+        """Generate points while avoiding exclusion zones"""
+        if exclusion_zones_gdf is None or len(exclusion_zones_gdf) == 0:
+            # No exclusion zones, use normal generation
+            return self.distribution_methods[distribution_type](lat, lon, count, radius)
+        try:
+            import geopandas as gpd
+            from shapely.geometry import Point
+            valid_points = []
+            max_attempts = count * 10  # Generate up to 10x more points to account for exclusions
+            attempts = 0
+            # Ensure exclusion zones are in the same CRS as our points (WGS84)
+            if exclusion_zones_gdf.crs is None:
+                exclusion_zones_gdf = exclusion_zones_gdf.set_crs('EPSG:4326')
+            elif exclusion_zones_gdf.crs != 'EPSG:4326':
+                exclusion_zones_gdf = exclusion_zones_gdf.to_crs('EPSG:4326')
+            while len(valid_points) < count and attempts < max_attempts:
+                # Generate a batch of points
+                batch_size = min(count * 2, max_attempts - attempts)
+                candidate_points = self.distribution_methods[distribution_type](
+                    lat, lon, batch_size, radius
+                )
+                # Check each point against exclusion zones
+                for point in candidate_points:
+                    if len(valid_points) >= count:
+                        break
+                    point_geom = Point(point[1], point[0])  # lon, lat for Point
+                    # Check if point intersects with any exclusion zone
+                    is_excluded = False
+                    for _, exclusion_zone in exclusion_zones_gdf.iterrows():
+                        if point_geom.intersects(exclusion_zone.geometry):
+                            is_excluded = True
+                            break
+                    if not is_excluded:
+                        valid_points.append(point)
+                attempts += batch_size
+            # If we couldn't generate enough valid points, warn the user
+            if len(valid_points) < count:
+                print(f"Warning: Could only generate {len(valid_points)} valid points out of {count} requested for location ({lat}, {lon}). Exclusion zones may be too restrictive.")
+            return valid_points
+        except ImportError:
+            print("GeoPandas not available for exclusion zone processing. Generating points without exclusions.")
+            return self.distribution_methods[distribution_type](lat, lon, count, radius)
+        except Exception as e:
+            print(f"Error processing exclusion zones: {str(e)}. Generating points without exclusions.")
+            return self.distribution_methods[distribution_type](lat, lon, count, radius)
+    def _uniform_distribution(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
+        """Generate points uniformly distributed within a circle"""
+        points = []
+        for _ in range(count):
+            # Generate a random angle and distance
+            angle = random.uniform(0, 2 * np.pi)
+            # Uniform distribution needs square root to avoid clustering in center
+            r = radius * np.sqrt(random.uniform(0, 1))
+            # Convert polar coordinates to Cartesian
+            x = r * np.cos(angle)
+            y = r * np.sin(angle)
+            # Convert meters to approximate degrees (this is a simplification)
+            # A more accurate implementation would use proper geographic projections
+            lat_offset = y / 111320  # 1 degree latitude is approximately 111320 meters
+            # Longitude degrees vary with latitude, so adjust accordingly
+            lon_offset = x / (111320 * np.cos(np.radians(lat)))
+            new_lat = lat + lat_offset
+            new_lon = lon + lon_offset
+            points.append((new_lat, new_lon))
+        return points
+    def _normal_distribution(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
+        """Generate points with normal distribution (more concentrated near center)"""
+        points = []
+        # Standard deviation as a fraction of radius
+        std_dev = radius / 3  # 3 sigma rule - 99.7% of points within radius
+        for _ in range(count):
+            # Generate points using normal distribution
+            while True:
+                # Generate x and y offsets using normal distribution
+                x = np.random.normal(0, std_dev)
+                y = np.random.normal(0, std_dev)
+                # Calculate distance from center
+                distance = np.sqrt(x**2 + y**2)
+                # If point is within radius, keep it
+                if distance <= radius:
+                    break
+            # Convert meters to approximate degrees
+            lat_offset = y / 111320
+            lon_offset = x / (111320 * np.cos(np.radians(lat)))
+            new_lat = lat + lat_offset
+            new_lon = lon + lon_offset
+            points.append((new_lat, new_lon))
+        return points
+    def _exponential_decay(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
+        """Generate points with exponential decay from center"""
+        points = []
+        # Rate parameter - controls how quickly density decreases with distance
+        rate = 3.0 / radius  # Higher value = steeper decay
+        for _ in range(count):
+            # Generate random angle
+            angle = random.uniform(0, 2 * np.pi)
+            # Generate distance with exponential distribution
+            # Use rejection sampling to ensure points are within radius
+            while True:
+                # Generate exponential random variable
+                r = random.expovariate(rate)
+                if r <= radius:
+                    break
+            # Convert polar coordinates to Cartesian
+            x = r * np.cos(angle)
+            y = r * np.sin(angle)
+            # Convert meters to approximate degrees
+            lat_offset = y / 111320
+            lon_offset = x / (111320 * np.cos(np.radians(lat)))
+            new_lat = lat + lat_offset
+            new_lon = lon + lon_offset
+            points.append((new_lat, new_lon))
+        return points
+    def _distance_weighted(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
+        """
+        Generate points with a custom distance-weighted distribution
+        (more points at medium distances than at center or edge)
+        """
+        points = []
+        for _ in range(count):
+            # Generate random angle
+            angle = random.uniform(0, 2 * np.pi)
+            # Custom distribution - more weight at middle distances
+            # Generate r² with beta distribution (concentration in middle)
+            r_squared = random.betavariate(2, 2)  # Beta with alpha=beta=2 has peak in middle
+            r = np.sqrt(r_squared) * radius
+            # Convert polar coordinates to Cartesian
+            x = r * np.cos(angle)
+            y = r * np.sin(angle)
+            # Convert meters to approximate degrees
+            lat_offset = y / 111320
+            lon_offset = x / (111320 * np.cos(np.radians(lat)))
+            new_lat = lat + lat_offset
+            new_lon = lon + lon_offset
+            points.append((new_lat, new_lon))
+        return points
+    def _generate_timestamps(self, count: int, start_time: datetime.datetime, end_time: datetime.datetime) -> List[datetime.datetime]:
+        """Generate uniformly distributed timestamps"""
+        timestamps = []
+        # Convert to timestamps for easier calculations
+        start_ts = start_time.timestamp()
+        end_ts = end_time.timestamp()
+        for _ in range(count):
+            # Generate a random timestamp between start and end
+            random_ts = random.uniform(start_ts, end_ts)
+            timestamp = datetime.datetime.fromtimestamp(random_ts)
+            timestamps.append(timestamp)
+        # Sort timestamps chronologically
+        timestamps.sort()
+        return timestamps
+def create_visualization(input_df, output_df, show_basemap=False, exclusion_zones_gdf=None):
+    """Create visualization of input and diffused points"""
+    fig, ax = plt.subplots(figsize=(12, 10))
+    # Set background color
+    fig.patch.set_facecolor('white')
+    ax.set_facecolor('#f8f9fa')
+    # Define colors for different exclusion zone types
+    exclusion_colors = {
+        'Water': '#4FC3F7',           # Light blue
+        'Park': '#66BB6A',            # Green
+        'Green space': '#81C784',     # Light green
+        'Forest': '#4CAF50',          # Dark green
+        'Industrial/Commercial': '#90A4AE',  # Grey
+        'Major road': '#FFD54F',      # Yellow
+        'Other': '#FFAB91'            # Light orange
+    }
+    # If basemap is requested, convert to Web Mercator and add basemap
+    if show_basemap:
+        try:
+            import contextily as ctx
+            import geopandas as gpd
+            from shapely.geometry import Point
+            # Create GeoDataFrames for proper projection
+            input_gdf = gpd.GeoDataFrame(
+                input_df,
+                geometry=[Point(lon, lat) for lon, lat in zip(input_df['lon'], input_df['lat'])],
+                crs='EPSG:4326'
+            )
+            output_gdf = gpd.GeoDataFrame(
+                output_df,
+                geometry=[Point(lon, lat) for lon, lat in zip(output_df['lon'], output_df['lat'])],
+                crs='EPSG:4326'
+            )
+            # Convert to Web Mercator for basemap compatibility
+            input_gdf_merc = input_gdf.to_crs('EPSG:3857')
+            output_gdf_merc = output_gdf.to_crs('EPSG:3857')
+            # Plot exclusion zones first (if provided) with color coding
+            if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
+                try:
+                    exclusion_zones_merc = exclusion_zones_gdf.to_crs('EPSG:3857')
+                    # Group by zone type and plot with appropriate colors
+                    plotted_types = set()
+                    for zone_type in exclusion_zones_merc['zone_type'].unique():
+                        zone_subset = exclusion_zones_merc[exclusion_zones_merc['zone_type'] == zone_type]
+                        color = exclusion_colors.get(zone_type, exclusion_colors['Other'])
+                        # Only add label for first occurrence of each type
+                        label = zone_type if zone_type not in plotted_types else None
+                        if label:
+                            plotted_types.add(zone_type)
+                        zone_subset.plot(ax=ax, color=color, alpha=0.6, edgecolor='white',
+                                       linewidth=0.5, label=label)
+                except Exception as e:
+                    print(f"Error plotting exclusion zones: {str(e)}")
+            # Extract coordinates for plotting
+            input_x = input_gdf_merc.geometry.x
+            input_y = input_gdf_merc.geometry.y
+            output_x = output_gdf_merc.geometry.x
+            output_y = output_gdf_merc.geometry.y
+            # Plot diffused points first (so they appear behind source points)
+            ax.scatter(output_x, output_y,
+                       alpha=0.7, color='#FF9800', s=12, label=f'Generated Points (n={len(output_df)})',
+                       edgecolors='white', linewidth=0.3)
+            # Draw radius circles first (so they appear behind everything else)
+            for idx, row in input_df.iterrows():
+                radius = row.get('radius', None)
+                if radius is not None:
+                    # Convert center point to Web Mercator
+                    center_point = gpd.GeoDataFrame(
+                        [1], geometry=[Point(row['lon'], row['lat'])], crs='EPSG:4326'
+                    ).to_crs('EPSG:3857')
+                    center_x = center_point.geometry.x.iloc[0]
+                    center_y = center_point.geometry.y.iloc[0]
+                    # Draw circle (radius is already in meters, which matches Web Mercator units)
+                    circle = plt.Circle((center_x, center_y), radius,
+                                       fill=False, color='#9C27B0', linestyle='--',
+                                       alpha=0.5, linewidth=2)
+                    ax.add_patch(circle)
+            # Plot source points with circles sized by count
+            min_size = 100
+            max_size = 800
+            if len(input_df) > 1:
+                size_range = input_df['count'].max() - input_df['count'].min()
+                if size_range > 0:
+                    sizes = min_size + (input_df['count'] - input_df['count'].min()) / size_range * (max_size - min_size)
+                else:
+                    sizes = [min_size] * len(input_df)
+            else:
+                sizes = [max_size]
+            # Plot source points in purple
+            ax.scatter(input_x, input_y,
+                        s=sizes, c='#9C27B0', alpha=0.9,
+                        edgecolors='white', linewidth=2,
+                        label='Source Points (size = count)', zorder=5)
+            # Add count labels next to source points
+            for idx, row in input_df.iterrows():
+                point_merc = gpd.GeoDataFrame(
+                    [1], geometry=[Point(row['lon'], row['lat'])], crs='EPSG:4326'
+                ).to_crs('EPSG:3857')
+                x_merc = point_merc.geometry.x.iloc[0]
+                y_merc = point_merc.geometry.y.iloc[0]
+                ax.annotate(f'{int(row["count"])}',
+                           (x_merc, y_merc),
+                           xytext=(8, 8), textcoords='offset points',
+                           fontsize=10, fontweight='bold', color='white',
+                           bbox=dict(boxstyle='round,pad=0.3', facecolor='#9C27B0', alpha=0.8),
+                           zorder=6)
+            # Add basemap
+            try:
+                ctx.add_basemap(ax, crs='EPSG:3857', source=ctx.providers.CartoDB.Positron, alpha=0.8)
+                basemap_added = True
+            except Exception as e:
+                print(f"Could not add basemap: {str(e)}")
+                basemap_added = False
+            # Set axis labels for Web Mercator
+            ax.set_xlabel('Easting (Web Mercator)', fontsize=12)
+            ax.set_ylabel('Northing (Web Mercator)', fontsize=12)
+            # Use projected coordinates for limits
+            x_coords = list(input_x) + list(output_x)
+            y_coords = list(input_y) + list(output_y)
+        except ImportError:
+            print("Contextily not available for basemap. Falling back to simple plot.")
+            show_basemap = False
+        except Exception as e:
+            print(f"Error creating basemap: {str(e)}. Falling back to simple plot.")
+            show_basemap = False
+    # Fallback to simple plot if basemap fails or is not requested
+    if not show_basemap:
+        # Plot exclusion zones first (if provided) with color coding
+        if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
+            try:
+                # Ensure exclusion zones are in WGS84
+                if exclusion_zones_gdf.crs != 'EPSG:4326':
+                    exclusion_zones_gdf = exclusion_zones_gdf.to_crs('EPSG:4326')
+                # Plot zones by type with appropriate colors
+                plotted_types = set()
+                for idx, zone in exclusion_zones_gdf.iterrows():
+                    zone_type = zone.get('zone_type', 'Other')
+                    color = exclusion_colors.get(zone_type, exclusion_colors['Other'])
+                    # Only add label for first occurrence of each type
+                    label = zone_type if zone_type not in plotted_types else None
+                    if label:
+                        plotted_types.add(zone_type)
+                    if zone.geometry.geom_type == 'Polygon':
+                        x, y = zone.geometry.exterior.xy
+                        ax.fill(x, y, color=color, alpha=0.6, edgecolor='white',
+                               linewidth=0.5, label=label)
+                    elif zone.geometry.geom_type == 'MultiPolygon':
+                        for poly in zone.geometry.geoms:
+                            x, y = poly.exterior.xy
+                            ax.fill(x, y, color=color, alpha=0.6, edgecolor='white',
+                                   linewidth=0.5, label=label)
+                            label = None  # Only label the first polygon
+            except Exception as e:
+                print(f"Error plotting exclusion zones: {str(e)}")
+        # Plot diffused points first (so they appear behind source points) - orange
+        ax.scatter(output_df['lon'], output_df['lat'],
+                   alpha=0.7, color='#FF9800', s=12, label=f'Generated Points (n={len(output_df)})',
+                   edgecolors='white', linewidth=0.3)
+        # Draw radius circles first (so they appear behind everything else) - purple
+        for idx, row in input_df.iterrows():
+            radius = row.get('radius', None)
+            if radius is not None:
+                # Approximate conversion from meters to degrees
+                radius_deg_lat = radius / 111320
+                radius_deg_lon = radius / (111320 * np.cos(np.radians(row['lat'])))
+                # Use the average as an approximation
+                radius_deg = (radius_deg_lat + radius_deg_lon) / 2
+                # Draw circle in purple
+                circle = plt.Circle((row['lon'], row['lat']), radius_deg,
+                                   fill=False, color='#9C27B0', linestyle='--',
+                                   alpha=0.5, linewidth=2)
+                ax.add_patch(circle)
+        # Plot source points with circles sized by count - purple
+        min_size = 100
+        max_size = 800
+        if len(input_df) > 1:
+            size_range = input_df['count'].max() - input_df['count'].min()
+            if size_range > 0:
+                sizes = min_size + (input_df['count'] - input_df['count'].min()) / size_range * (max_size - min_size)
+            else:
+                sizes = [min_size] * len(input_df)
+        else:
+            sizes = [max_size]
+        # Plot source points in purple
+        ax.scatter(input_df['lon'], input_df['lat'],
+                    s=sizes, c='#9C27B0', alpha=0.9,
+                    edgecolors='white', linewidth=2,
+                    label='Source Points (size = count)', zorder=5)
+        # Add count labels next to source points with purple background
+        for idx, row in input_df.iterrows():
+            ax.annotate(f'{int(row["count"])}',
+                       (row['lon'], row['lat']),
+                       xytext=(8, 8), textcoords='offset points',
+                       fontsize=10, fontweight='bold', color='white',
+                       bbox=dict(boxstyle='round,pad=0.3', facecolor='#9C27B0', alpha=0.8),
+                       zorder=6)
+        # Standard coordinate labels
+        ax.set_xlabel('Longitude', fontsize=12)
+        ax.set_ylabel('Latitude', fontsize=12)
+        # Use original coordinates for limits
+        x_coords = list(input_df['lon']) + list(output_df['lon'])
+        y_coords = list(input_df['lat']) + list(output_df['lat'])
+    # Common styling
+    title = 'Spatial Diffusion Results'
+    if show_basemap:
+        title += ' (with Basemap)'
+    if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
+        title += ' - Exclusion Zones Applied'
+    subtitle = 'Purple source points sized by count, orange generated points, dashed circles show diffusion radius'
+    ax.set_title(f'{title}\n{subtitle}',
+                fontsize=14, fontweight='bold', pad=20)
+    # Legend with better positioning
+    legend = ax.legend(loc='upper right', bbox_to_anchor=(1, 1),
+                      frameon=True, fancybox=True, shadow=True)
+    legend.get_frame().set_facecolor('white')
+    legend.get_frame().set_alpha(0.9)
+    # Add grid (lighter for basemap)
+    grid_alpha = 0.2 if show_basemap else 0.3
+    ax.grid(True, alpha=grid_alpha, linestyle='-', linewidth=0.5)
+    # Make equal aspect ratio
+    ax.set_aspect('equal', 'box')
+    # Add some padding around the data
+    x_margin = (max(x_coords) - min(x_coords)) * 0.1
+    y_margin = (max(y_coords) - min(y_coords)) * 0.1
+    if x_margin == 0:  # Handle case where all points have same x-coordinate
+        x_margin = 1000 if show_basemap else 0.01
+    if y_margin == 0:  # Handle case where all points have same y-coordinate
+        y_margin = 1000 if show_basemap else 0.01
+    ax.set_xlim(min(x_coords) - x_margin, max(x_coords) + x_margin)
+    ax.set_ylim(min(y_coords) - y_margin, max(y_coords) + y_margin)
+    # Tight layout
+    plt.tight_layout()
+    return fig
+def process_csv(file_obj, distribution_type, global_radius, show_basemap, auto_exclusions, exclusion_file, include_time, time_start, time_end, seed):
+    """Process input CSV and generate diffused points"""
+    try:
+        # Read input CSV
+        df = pd.read_csv(file_obj.name)
+        # Validate required columns
+        required_cols = ['lat', 'lon', 'count']
+        if not all(col in df.columns for col in required_cols):
+            return None, f"Error: CSV must contain columns: {', '.join(required_cols)}"
+        # Convert global_radius to float if provided
+        if global_radius and global_radius.strip():
+            try:
+                global_radius = float(global_radius)
+            except ValueError:
+                return None, "Error: Global radius must be a number"
+        else:
+            global_radius = None
+            # If global radius not provided, check for radius column
+            if 'radius' not in df.columns:
+                return None, "Error: Either provide a global radius or include a 'radius' column in the CSV"
+        # Convert seed to int if provided
+        if seed and seed.strip():
+            try:
+                seed = int(seed)
+            except ValueError:
+                return None, "Error: Seed must be an integer"
+        else:
+            seed = None
+        # Process exclusion zones
+        exclusion_zones_gdf = None
+        # First, try manual file upload (takes priority)
+        if exclusion_file is not None:
+            try:
+                import geopandas as gpd
+                # Determine file type and read accordingly
+                file_extension = os.path.splitext(exclusion_file.name)[1].lower()
+                if file_extension in ['.geojson', '.json']:
+                    exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
+                elif file_extension == '.gpkg':
+                    exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
+                elif file_extension == '.shp':
+                    exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
+                else:
+                    return None, f"Error: Unsupported exclusion zone file format: {file_extension}"
+                # Ensure CRS is set
+                if exclusion_zones_gdf.crs is None:
+                    exclusion_zones_gdf = exclusion_zones_gdf.set_crs('EPSG:4326')
+                print(f"Loaded {len(exclusion_zones_gdf)} custom exclusion zones from {exclusion_file.name}")
+            except ImportError:
+                return None, "Error: GeoPandas required for exclusion zones processing"
+            except Exception as e:
+                return None, f"Error reading exclusion zones file: {str(e)}"
+        # If no manual file, try automatic exclusions from OpenStreetMap
+        elif auto_exclusions and len(auto_exclusions) > 0:
+            try:
+                # Calculate bounds around input points
+                bounds = calculate_bounds_from_points(df)
+                print(f"Fetching automatic exclusions for bounds: {bounds}")
+                # Fetch OSM data
+                exclusion_zones_gdf = fetch_osm_exclusion_zones(bounds, auto_exclusions)
+                if exclusion_zones_gdf is not None:
+                    print(f"Fetched {len(exclusion_zones_gdf)} exclusion zones from OpenStreetMap")
+                else:
+                    print("No exclusion zones found in OpenStreetMap for this area")
+            except Exception as e:
+                print(f"Warning: Could not fetch automatic exclusions: {str(e)}")
+                # Continue without exclusions rather than failing completely
+                exclusion_zones_gdf = None
+        # Process time if requested
+        if include_time:
+            if not time_start or not time_end:
+                return None, "Error: If time distribution is enabled, both start and end times must be provided"
+            try:
+                time_start_dt = datetime.datetime.strptime(time_start, "%Y-%m-%d %H:%M:%S")
+                time_end_dt = datetime.datetime.strptime(time_end, "%Y-%m-%d %H:%M:%S")
+                if time_start_dt >= time_end_dt:
+                    return None, "Error: End time must be after start time"
+            except ValueError:
+                return None, "Error: Invalid time format. Use YYYY-MM-DD HH:MM:SS"
+        else:
+            time_start_dt = None
+            time_end_dt = None
+        # Create diffuser and generate diffused points
+        diffuser = SpatialDiffuser()
+        result_df = diffuser.diffuse_points(
+            input_data=df,
+            distribution_type=distribution_type,
+            global_radius=global_radius,
+            time_start=time_start_dt,
+            time_end=time_end_dt,
+            seed=seed,
+            exclusion_zones_gdf=exclusion_zones_gdf
+        )
+        # Create temporary CSV file
+        temp_file = "diffused_points.csv"
+        result_df.to_csv(temp_file, index=False)
+        # Create visualization with basemap and exclusion zones
+        fig = create_visualization(df, result_df, show_basemap, exclusion_zones_gdf)
+        return fig, temp_file
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+def create_diffusion_interface():
+    """Create Gradio interface for the spatial diffusion tool"""
+    with gr.Blocks() as diffusion_interface:
+        gr.Markdown("## 🗺️ Spatial Diffusion Tool")
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Move description into the left column for better space usage
+                gr.Markdown("""
+                ### About This Tool
+                Transform aggregated geographic points with counts into individual points using spatial diffusion methods.
+                **Input CSV Format:**
+                - `lat`: Latitude of source point
+                - `lon`: Longitude of source point
+                - `count`: Number of points to generate
+                - `radius`: (Optional) Diffusion radius in meters
+                **Distribution Types:**
+                - **Uniform**: Equal probability throughout circle
+                - **Normal**: Higher density near center
+                - **Exponential Decay**: Density decreases from center
+                - **Distance-Weighted**: More points at medium distances
+                """)
+                # Input controls
+                input_file = gr.File(label="Input CSV File", file_types=[".csv"])
+                # Distribution options grouped together
+                gr.Markdown("### 🎯 Distribution Options")
+                with gr.Row():
+                    distribution = gr.Dropdown(
+                        choices=["uniform", "normal", "exponential_decay", "distance_weighted"],
+                        value="uniform",
+                        label="Distribution Type",
+                        scale=2
+                    )
+                    seed = gr.Textbox(
+                        label="Random Seed (optional)",
+                        placeholder="e.g. 42",
+                        scale=1
+                    )
+                global_radius = gr.Textbox(
+                    label="Global Radius (meters)",
+                    placeholder="Only if radius column not in CSV"
+                )
+                # Temporal controls in distribution section
+                with gr.Accordion("⏰ Temporal Distribution (Optional)", open=False):
+                    include_time = gr.Checkbox(label="Enable Temporal Distribution", value=False)
+                    with gr.Group() as time_group:
+                        time_start = gr.Textbox(
+                            label="Start Time",
+                            placeholder="YYYY-MM-DD HH:MM:SS"
+                        )
+                        time_end = gr.Textbox(
+                            label="End Time",
+                            placeholder="YYYY-MM-DD HH:MM:SS"
+                        )
+                # Map and exclusion options grouped together
+                gr.Markdown("### 🗺️ Map & Exclusion Options")
+                show_basemap = gr.Checkbox(
+                    label="Show underlying map (requires internet)",
+                    value=False
+                )
+                gr.Markdown("*Adds geographic context with street/satellite imagery*")
+                # Automatic exclusion zones - no default selection
+                auto_exclusions = gr.CheckboxGroup(
+                    label="Auto-exclude from OpenStreetMap:",
+                    choices=["Water bodies", "Parks & green spaces", "Industrial areas", "Major roads"],
+                    value=[]  # No default selections
+                )
+                # Advanced manual exclusion zones
+                with gr.Accordion("🔧 Advanced: Custom Exclusion Zones", open=False):
+                    exclusion_file = gr.File(
+                        label="Upload custom shapefile (optional)",
+                        file_types=[".geojson", ".json", ".gpkg", ".shp"]
+                    )
+                    gr.Markdown("*Overrides automatic exclusions if provided*")
+                process_btn = gr.Button(
+                    "🎯 Generate Diffused Points",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=2):
+                # Give more space to visualization
+                plot_output = gr.Plot(
+                    label="📍 Spatial Diffusion Visualization",
+                    show_label=True
+                )
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        file_output = gr.File(label="📥 Download Generated Points")
+                    with gr.Column(scale=1):
+                        gr.Markdown(
+                            """
+                            **Legend:**
+                            🟣 Source points (sized by count)
+                            🟠 Generated points
+                            ⭕ Diffusion radius
+                            🟦 Water bodies
+                            🟢 Parks & green spaces
+                            ⬜ Industrial areas
+                            🟡 Major roads
+                            """
+                        )
+        # Set up event handlers
+        process_btn.click(
+            fn=process_csv,
+            inputs=[input_file, distribution, global_radius, show_basemap, auto_exclusions, exclusion_file, include_time, time_start, time_end, seed],
+            outputs=[plot_output, file_output]
+        )
+        # Show/hide time inputs based on checkbox
+        include_time.change(
+            fn=lambda x: gr.update(visible=x),
+            inputs=[include_time],
+            outputs=[time_group]
+        )
+    return diffusion_interface
+if __name__ == "__main__":
+    # This allows the module to be run directly for testing
+    app = create_diffusion_interface()
+    app.launch()