Spaces:

ddecosmo
/

kernel_density_test

Sleeping

App Files Files Community

ddecosmo commited on Sep 28, 2025

Commit

bd27d0b

verified ·

1 Parent(s): a10abe2

Upload 2 files

Browse files

Files changed (2) hide show

kernel_density_prototype.py +381 -0
requirements.txt +7 -0

kernel_density_prototype.py ADDED Viewed

	@@ -0,0 +1,381 @@

+# -*- coding: utf-8 -*-
+"""Kernel_density_prototype.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1binQn5KdO6tLQHL8uV6s8pTSdXTiArl4
+"""
+import numpy as np
+import pandas as pd
+import os # Import os for file path joining
+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+import numpy as np
+from PIL import Image
+import io
+import folium
+from folium.plugins import HeatMap
+from folium import Marker # Import Marker for plotting points
+import matplotlib.colors # Import for color conversion
+import pandas as pd # Import pandas for DataFrame if needed, though not strictly for this function
+import pandas as pd
+from scipy.stats import gaussian_kde
+import numpy as np
+import gradio as gr
+# Organized version #1: Define Pittsburgh Coordinate Range
+# Define the latitude and longitude boundaries for the Pittsburgh area
+# These are approximate bounds and can be adjusted
+pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
+pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8
+# Define a central point for generating some distributions (e.g., Normal)
+pittsburgh_lat = 40.4406 # Example center latitude
+pittsburgh_lon = -79.9959 # Example center longitude
+print(f"Pittsburgh Latitude Range: {pittsburgh_lat_min} to {pittsburgh_lat_max}")
+print(f"Pittsburgh Longitude Range: {pittsburgh_lon_min} to {pittsburgh_lon_max}")
+# Organized version #2: Generate and save temporary CSV files
+# Define the number of points for each distribution
+num_points = 500
+# Define the Pittsburgh coordinate range (assuming these are defined in a previous cell)
+# If not, uncomment and define them here or ensure the previous cell is run first.
+# pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
+# pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8
+# pittsburgh_lat = 40.4406 # Example center
+# pittsburgh_lon = -79.9959 # Example center
+# Function to generate uniformly distributed points
+def generate_uniform_points(lat_min, lat_max, lon_min, lon_max, num_points):
+    lats = np.random.uniform(lat_min, lat_max, num_points)
+    lons = np.random.uniform(lon_min, lon_max, num_points)
+    return pd.DataFrame({'latitude': lats, 'longitude': lons})
+# Function to generate normally distributed points (clustered around a center)
+def generate_normal_points(center_lat, center_lon, lat_std, lon_std, num_points):
+    lats = np.random.normal(center_lat, lat_std, num_points)
+    lons = np.random.normal(center_lon, lon_std, num_points)
+     # Filter to keep points within the original range after adding noise (optional but good)
+    valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
+    lats = lats[valid_indices]
+    lons = lons[valid_indices]
+    # If after filtering we have significantly less points than requested, we might need to regenerate
+    # For simplicity here, we'll just use the filtered points.
+    return pd.DataFrame({'latitude': lats, 'longitude': lons})
+# Function to generate bimodal points (two clusters)
+def generate_bimodal_points(center1_lat, center1_lon, center2_lat, center2_lon, lat_std, lon_std, num_points):
+    # Generate half the points around the first center
+    num_points_half = num_points // 2
+    lats1 = np.random.normal(center1_lat, lat_std, num_points_half)
+    lons1 = np.random.normal(center1_lon, lon_std, num_points_half)
+    # Generate the other half around the second center
+    lats2 = np.random.normal(center2_lat, lat_std, num_points - num_points_half)
+    lons2 = np.random.normal(center2_lon, lon_std, num_points - num_points_half)
+    # Combine the points
+    lats = np.concatenate([lats1, lats2])
+    lons = np.concatenate([lons1, lons2])
+    # Filter to keep points within the original range
+    valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
+    lats = lats[valid_indices]
+    lons = lons[valid_indices]
+    return pd.DataFrame({'latitude': lats, 'longitude': lons})
+# Function to generate points with a Poisson-like distribution (approximated)
+# Generating truly spatially random points following a Poisson process within a region is more complex,
+# often involving generating a Poisson number of points and then distributing them uniformly.
+# A simpler approximation for visualization purposes could be generating clusters with varying densities,
+# or using a transformation of uniform points.
+# For this example, let's generate points with varying density based on a simple transformation,
+# or alternatively, generate several small clusters.
+# Let's go with generating several small clusters to simulate a non-uniform, potentially "clumpy" distribution.
+def generate_poisson_like_points(lat_min, lat_max, lon_min, lon_max, num_points, num_clusters=10, cluster_std=0.01):
+    all_lats = []
+    all_lons = []
+    points_per_cluster = num_points // num_clusters
+    # Generate random centers for the clusters within the overall range
+    cluster_centers_lat = np.random.uniform(lat_min + cluster_std, lat_max - cluster_std, num_clusters)
+    cluster_centers_lon = np.random.uniform(lon_min + cluster_std, lon_max - cluster_std, num_clusters)
+    for i in range(num_clusters):
+        lats = np.random.normal(cluster_centers_lat[i], cluster_std, points_per_cluster)
+        lons = np.random.normal(cluster_centers_lon[i], cluster_std, points_per_cluster)
+        all_lats.extend(lats)
+        all_lons.extend(lons)
+    lats = np.array(all_lats)
+    lons = np.array(all_lons)
+    # Filter to keep points within the original range
+    valid_indices = (lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)
+    lats = lats[valid_indices]
+    lons = lons[valid_indices]
+    return pd.DataFrame({'latitude': lats, 'longitude': lons})
+# Generate data for different distributions
+uniform_df = generate_uniform_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)
+normal_df = generate_normal_points(pittsburgh_lat, pittsburgh_lon, 0.05, 0.05, num_points) # Using the original pittsburgh_lat/lon as center
+# Define centers for bimodal distribution within the Pittsburgh range
+bimodal_center1_lat, bimodal_center1_lon = 40.4, -80.1
+bimodal_center2_lat, bimodal_center2_lon = 40.5, -79.9
+bimodal_df = generate_bimodal_points(bimodal_center1_lat, bimodal_center1_lon, bimodal_center2_lat, bimodal_center2_lon, 0.03, 0.03, num_points)
+poisson_like_df = generate_poisson_like_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)
+# Define directory to save CSVs
+csv_dir = "spatial_data"
+os.makedirs(csv_dir, exist_ok=True) # Create the directory if it doesn't exist
+# Define file paths
+uniform_csv_path = os.path.join(csv_dir, "uniform_coords.csv")
+normal_csv_path = os.path.join(csv_dir, "normal_coords.csv")
+bimodal_csv_path = os.path.join(csv_dir, "bimodal_coords.csv")
+poisson_csv_path = os.path.join(csv_dir, "poisson_like_coords.csv")
+# Save dataframes to CSV files
+uniform_df.to_csv(uniform_csv_path, index=False)
+normal_df.to_csv(normal_csv_path, index=False)
+bimodal_df.to_csv(bimodal_csv_path, index=False)
+poisson_like_df.to_csv(poisson_csv_path, index=False)
+print(f"Saved uniform data to {uniform_csv_path}")
+print(f"Saved normal data to {normal_csv_path}")
+print(f"Saved bimodal data to {bimodal_csv_path}")
+print(f"Saved poisson-like data to {poisson_csv_path}")
+# Store the file paths and distribution names for the Gradio dropdown
+distribution_files = {
+    "Uniform": uniform_csv_path,
+    "Normal": normal_csv_path,
+    "Bimodal": bimodal_csv_path,
+    "Poisson-like": poisson_csv_path
+}
+# Organized version #3: Create a function to load data and calculate KDE
+def load_data_and_calculate_kde(distribution_name):
+    """
+    Loads coordinate data for a given distribution name and calculates its KDE.
+    Args:
+        distribution_name (str): The name of the distribution (key in distribution_files).
+    Returns:
+        tuple: A tuple containing:
+            - latitudes (np.ndarray): Array of latitudes.
+            - longitudes (np.ndarray): Array of longitudes.
+            - kde_object (gaussian_kde): The calculated kernel density estimate object.
+            - error_message (str or None): An error message if loading fails, otherwise None.
+    """
+    file_path = distribution_files.get(distribution_name)
+    if file_path is None:
+        return None, None, None, f"Error: Unknown distribution name '{distribution_name}'"
+    try:
+        df = pd.read_csv(file_path)
+        if 'latitude' not in df.columns or 'longitude' not in df.columns:
+             return None, None, None, f"Error: CSV file '{file_path}' must contain 'latitude' and 'longitude' columns."
+        latitudes = df['latitude'].values
+        longitudes = df['longitude'].values
+        # Combine coordinates into a 2D array for KDE
+        coordinates = np.vstack([longitudes, latitudes])
+        # Compute the kernel density estimate
+        kde_object = gaussian_kde(coordinates)
+        return latitudes, longitudes, kde_object, None
+    except FileNotFoundError:
+        return None, None, None, f"Error: File not found at '{file_path}'"
+    except Exception as e:
+        return None, None, None, f"Error loading data or calculating KDE: {e}"
+# Example usage (optional - for testing the function)
+# test_distribution = "Uniform"
+# test_lats, test_lons, test_kde, error = load_data_and_calculate_kde(test_distribution)
+# if error:
+#     print(error)
+# else:
+#     print(f"Successfully loaded data and calculated KDE for {test_distribution}. KDE object: {test_kde}")
+# Organized version #4: Create a function to visualize KDE and points
+def plot_kde_and_points(min_lat, max_lat, min_lon, max_lon, original_latitudes, original_longitudes, kde_object):
+    """
+    Generates a static KDE heatmap and an interactive Folium map with points colored by KDE density.
+    Args:
+        min_lat (float): Minimum latitude for the static heatmap.
+        max_lat (float): Maximum latitude for the static heatmap.
+        min_lon (float): Minimum longitude for the static heatmap.
+        max_lon (float): Maximum longitude for the static heatmap.
+        original_latitudes (np.ndarray): Array of original latitudes.
+        original_longitudes (np.ndarray): Array of original longitudes.
+        kde_object (gaussian_kde): The calculated kernel density estimate object.
+    Returns:
+        tuple: A tuple containing:
+            - pil_image (PIL.Image): The static KDE heatmap image.
+            - colored_points_map_html (str): The HTML for the interactive map with colored points.
+    """
+    # --- Matplotlib Static Heatmap ---
+    # Create a grid of points within the specified latitude and longitude range
+    x, y = np.mgrid[min_lon:max_lon:100j, min_lat:max_lat:100j]
+    positions = np.vstack([x.ravel(), y.ravel()])
+    # Evaluate the kernel density estimate at each point in the grid
+    z = kde_object(positions)
+    # Reshape the density values into a 2D array corresponding to the grid
+    z = z.reshape(x.shape)
+    # Normalize the density values to the range [0, 1] for consistent colormap application
+    z_normalized = (z - z.min()) / (z.max() - z.min()) if z.max() > z.min() else np.zeros_like(z)
+    fig, ax = plt.subplots(figsize=(8, 8))
+    im = ax.imshow(z_normalized.T, origin='lower',
+                   extent=[min_lon, max_lon, min_lat, max_lat],
+                   cmap='hot', aspect='auto')
+    fig.colorbar(im, ax=ax, label='Density')
+    ax.set_xlabel('Longitude')
+    ax.set_ylabel('Latitude')
+    ax.set_title('Kernel Density Estimate Heatmap (Static)')
+    # To return a PIL Image for Gradio, save the plot to a buffer
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight')
+    buf.seek(0)
+    pil_image = Image.open(buf)
+    plt.close(fig) # Close the plot to free up memory
+    # --- Folium Interactive Map with Colored Points ---
+    # Calculate density at the original points
+    original_coordinates = np.vstack([original_longitudes, original_latitudes])
+    density_at_original_points = kde_object(original_coordinates)
+    # Normalize density values for coloring
+    # Add a small epsilon to avoid division by zero if all densities are the same
+    density_normalized = (density_at_original_points - density_at_original_points.min()) / (density_at_original_points.max() - density_at_original_points.min() + 1e-9)
+    # Choose a colormap (e.g., 'viridis', 'hot', 'plasma')
+    colormap = cm.get_cmap('viridis')
+    # Create Folium map centered around the mean of the points
+    map_center_lat = np.mean(original_latitudes)
+    map_center_lon = np.mean(original_longitudes)
+    m_colored_points = folium.Map(location=[map_center_lat, map_center_lon], zoom_start=10)
+    # Add colored circle markers for each point
+    for lat, lon, density_norm in zip(original_latitudes, original_longitudes, density_normalized):
+        # Get color from colormap based on normalized density
+        color = matplotlib.colors.rgb2hex(colormap(density_norm))
+        # Add a circle marker with the determined color
+        folium.CircleMarker(
+            location=[lat, lon],
+            radius=5, # Adjust marker size as needed
+            color=color,
+            fill=True,
+            fill_color=color,
+            fill_opacity=0.7,
+            tooltip=f"Density: {kde_object([lon, lat])[0]:.4f}" # Add density as a tooltip
+        ).add_to(m_colored_points)
+    # Save the colored points map to an HTML string
+    colored_points_map_html = m_colored_points._repr_html_()
+    return pil_image, colored_points_map_html # Return both the static heatmap image and the colored points map HTML
+# Organized version #5: Update the Gradio interface
+# The plot_kde_and_points function is defined in a previous cell
+# The load_data_and_calculate_kde function is defined in a previous cell
+# The distribution_files dictionary is defined in a previous cell
+# The pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max are defined in a previous cell
+# Define the main function that will be called by Gradio
+def update_visualization(distribution_name):
+    """
+    Loads data for the selected distribution, calculates KDE, and generates visualizations.
+    Args:
+        distribution_name (str): The name of the selected distribution.
+    Returns:
+        tuple: A tuple containing:
+            - pil_image (PIL.Image): The static KDE heatmap image.
+            - colored_points_map_html (str): The HTML for the interactive map with colored points.
+            - error_message (str): An error message if data loading fails, otherwise empty string.
+    """
+    latitudes, longitudes, kde_object, error = load_data_and_calculate_kde(distribution_name)
+    if error:
+        # Return empty or placeholder outputs and the error message
+        return None, "", error
+    # Use the modified visualization function that accepts latitudes, longitudes, and kde_object
+    # Pass the predefined Pittsburgh coordinates
+    pil_image, colored_points_map_html = plot_kde_and_points(
+        pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max,
+        latitudes, longitudes, kde_object
+    )
+    return pil_image, colored_points_map_html, "" # Return visualizations and empty error message
+# Get the list of distribution names for the dropdown
+distribution_choices = list(distribution_files.keys())
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=update_visualization,
+    inputs=[
+        gr.Dropdown(choices=distribution_choices, label="Select Distribution", value=distribution_choices[0]),
+        # Removed the number inputs for latitude and longitude range
+    ],
+    outputs=[
+        gr.Image(label="Static Kernel Density Map (Matplotlib)"),
+        gr.HTML(label="Interactive Points Map Colored by KDE (Folium)"),
+        gr.Textbox(label="Error Message", visible=False) # Add a textbox to display errors
+    ],
+    title="Kernel Density Estimation of Different Spatial Distributions around Pittsburgh",
+    description="Select a spatial distribution from the dropdown to visualize its kernel density and point distribution around Pittsburgh."
+)
+# Launch the Gradio interface
+iface.launch(share=True)
+"""Here is the content for your `requirements.txt` file:"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy
+pandas
+matplotlib
+pillow
+scipy
+folium
+gradio