File size: 16,418 Bytes
bd27d0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# -*- coding: utf-8 -*-
"""Kernel_density_prototype.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1binQn5KdO6tLQHL8uV6s8pTSdXTiArl4
"""



import numpy as np
import pandas as pd
import os # Import os for file path joining
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
from PIL import Image
import io
import folium
from folium.plugins import HeatMap
from folium import Marker # Import Marker for plotting points
import matplotlib.colors # Import for color conversion
import pandas as pd # Import pandas for DataFrame if needed, though not strictly for this function

import pandas as pd
from scipy.stats import gaussian_kde
import numpy as np
import gradio as gr

# Organized version #1: Define Pittsburgh Coordinate Range

# Define the latitude and longitude boundaries for the Pittsburgh area
# These are approximate bounds and can be adjusted
pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8

# Define a central point for generating some distributions (e.g., Normal)
pittsburgh_lat = 40.4406 # Example center latitude
pittsburgh_lon = -79.9959 # Example center longitude


print(f"Pittsburgh Latitude Range: {pittsburgh_lat_min} to {pittsburgh_lat_max}")
print(f"Pittsburgh Longitude Range: {pittsburgh_lon_min} to {pittsburgh_lon_max}")

# Organized version #2: Generate and save temporary CSV files


# Define the number of points for each distribution
num_points = 500

# Define the Pittsburgh coordinate range (assuming these are defined in a previous cell)
# If not, uncomment and define them here or ensure the previous cell is run first.
# pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
# pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8
# pittsburgh_lat = 40.4406 # Example center
# pittsburgh_lon = -79.9959 # Example center


# Function to generate uniformly distributed points
def generate_uniform_points(lat_min, lat_max, lon_min, lon_max, num_points):
    lats = np.random.uniform(lat_min, lat_max, num_points)
    lons = np.random.uniform(lon_min, lon_max, num_points)
    return pd.DataFrame({'latitude': lats, 'longitude': lons})

# Function to generate normally distributed points (clustered around a center)
def generate_normal_points(center_lat, center_lon, lat_std, lon_std, num_points):
    lats = np.random.normal(center_lat, lat_std, num_points)
    lons = np.random.normal(center_lon, lon_std, num_points)
     # Filter to keep points within the original range after adding noise (optional but good)
    valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
    lats = lats[valid_indices]
    lons = lons[valid_indices]
    # If after filtering we have significantly less points than requested, we might need to regenerate
    # For simplicity here, we'll just use the filtered points.
    return pd.DataFrame({'latitude': lats, 'longitude': lons})


# Function to generate bimodal points (two clusters)
def generate_bimodal_points(center1_lat, center1_lon, center2_lat, center2_lon, lat_std, lon_std, num_points):
    # Generate half the points around the first center
    num_points_half = num_points // 2
    lats1 = np.random.normal(center1_lat, lat_std, num_points_half)
    lons1 = np.random.normal(center1_lon, lon_std, num_points_half)

    # Generate the other half around the second center
    lats2 = np.random.normal(center2_lat, lat_std, num_points - num_points_half)
    lons2 = np.random.normal(center2_lon, lon_std, num_points - num_points_half)

    # Combine the points
    lats = np.concatenate([lats1, lats2])
    lons = np.concatenate([lons1, lons2])

    # Filter to keep points within the original range
    valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
    lats = lats[valid_indices]
    lons = lons[valid_indices]

    return pd.DataFrame({'latitude': lats, 'longitude': lons})

# Function to generate points with a Poisson-like distribution (approximated)
# Generating truly spatially random points following a Poisson process within a region is more complex,
# often involving generating a Poisson number of points and then distributing them uniformly.
# A simpler approximation for visualization purposes could be generating clusters with varying densities,
# or using a transformation of uniform points.
# For this example, let's generate points with varying density based on a simple transformation,
# or alternatively, generate several small clusters.
# Let's go with generating several small clusters to simulate a non-uniform, potentially "clumpy" distribution.
def generate_poisson_like_points(lat_min, lat_max, lon_min, lon_max, num_points, num_clusters=10, cluster_std=0.01):
    all_lats = []
    all_lons = []
    points_per_cluster = num_points // num_clusters

    # Generate random centers for the clusters within the overall range
    cluster_centers_lat = np.random.uniform(lat_min + cluster_std, lat_max - cluster_std, num_clusters)
    cluster_centers_lon = np.random.uniform(lon_min + cluster_std, lon_max - cluster_std, num_clusters)


    for i in range(num_clusters):
        lats = np.random.normal(cluster_centers_lat[i], cluster_std, points_per_cluster)
        lons = np.random.normal(cluster_centers_lon[i], cluster_std, points_per_cluster)
        all_lats.extend(lats)
        all_lons.extend(lons)

    lats = np.array(all_lats)
    lons = np.array(all_lons)

    # Filter to keep points within the original range
    valid_indices = (lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)
    lats = lats[valid_indices]
    lons = lons[valid_indices]


    return pd.DataFrame({'latitude': lats, 'longitude': lons})


# Generate data for different distributions
uniform_df = generate_uniform_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)
normal_df = generate_normal_points(pittsburgh_lat, pittsburgh_lon, 0.05, 0.05, num_points) # Using the original pittsburgh_lat/lon as center
# Define centers for bimodal distribution within the Pittsburgh range
bimodal_center1_lat, bimodal_center1_lon = 40.4, -80.1
bimodal_center2_lat, bimodal_center2_lon = 40.5, -79.9
bimodal_df = generate_bimodal_points(bimodal_center1_lat, bimodal_center1_lon, bimodal_center2_lat, bimodal_center2_lon, 0.03, 0.03, num_points)
poisson_like_df = generate_poisson_like_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)


# Define directory to save CSVs
csv_dir = "spatial_data"
os.makedirs(csv_dir, exist_ok=True) # Create the directory if it doesn't exist

# Define file paths
uniform_csv_path = os.path.join(csv_dir, "uniform_coords.csv")
normal_csv_path = os.path.join(csv_dir, "normal_coords.csv")
bimodal_csv_path = os.path.join(csv_dir, "bimodal_coords.csv")
poisson_csv_path = os.path.join(csv_dir, "poisson_like_coords.csv")

# Save dataframes to CSV files
uniform_df.to_csv(uniform_csv_path, index=False)
normal_df.to_csv(normal_csv_path, index=False)
bimodal_df.to_csv(bimodal_csv_path, index=False)
poisson_like_df.to_csv(poisson_csv_path, index=False)


print(f"Saved uniform data to {uniform_csv_path}")
print(f"Saved normal data to {normal_csv_path}")
print(f"Saved bimodal data to {bimodal_csv_path}")
print(f"Saved poisson-like data to {poisson_csv_path}")


# Store the file paths and distribution names for the Gradio dropdown
distribution_files = {
    "Uniform": uniform_csv_path,
    "Normal": normal_csv_path,
    "Bimodal": bimodal_csv_path,
    "Poisson-like": poisson_csv_path
}

# Organized version #3: Create a function to load data and calculate KDE


def load_data_and_calculate_kde(distribution_name):
    """
    Loads coordinate data for a given distribution name and calculates its KDE.

    Args:
        distribution_name (str): The name of the distribution (key in distribution_files).

    Returns:
        tuple: A tuple containing:
            - latitudes (np.ndarray): Array of latitudes.
            - longitudes (np.ndarray): Array of longitudes.
            - kde_object (gaussian_kde): The calculated kernel density estimate object.
            - error_message (str or None): An error message if loading fails, otherwise None.
    """
    file_path = distribution_files.get(distribution_name)

    if file_path is None:
        return None, None, None, f"Error: Unknown distribution name '{distribution_name}'"

    try:
        df = pd.read_csv(file_path)
        if 'latitude' not in df.columns or 'longitude' not in df.columns:
             return None, None, None, f"Error: CSV file '{file_path}' must contain 'latitude' and 'longitude' columns."

        latitudes = df['latitude'].values
        longitudes = df['longitude'].values

        # Combine coordinates into a 2D array for KDE
        coordinates = np.vstack([longitudes, latitudes])

        # Compute the kernel density estimate
        kde_object = gaussian_kde(coordinates)

        return latitudes, longitudes, kde_object, None

    except FileNotFoundError:
        return None, None, None, f"Error: File not found at '{file_path}'"
    except Exception as e:
        return None, None, None, f"Error loading data or calculating KDE: {e}"

# Example usage (optional - for testing the function)
# test_distribution = "Uniform"
# test_lats, test_lons, test_kde, error = load_data_and_calculate_kde(test_distribution)
# if error:
#     print(error)
# else:
#     print(f"Successfully loaded data and calculated KDE for {test_distribution}. KDE object: {test_kde}")

# Organized version #4: Create a function to visualize KDE and points


def plot_kde_and_points(min_lat, max_lat, min_lon, max_lon, original_latitudes, original_longitudes, kde_object):
    """
    Generates a static KDE heatmap and an interactive Folium map with points colored by KDE density.

    Args:
        min_lat (float): Minimum latitude for the static heatmap.
        max_lat (float): Maximum latitude for the static heatmap.
        min_lon (float): Minimum longitude for the static heatmap.
        max_lon (float): Maximum longitude for the static heatmap.
        original_latitudes (np.ndarray): Array of original latitudes.
        original_longitudes (np.ndarray): Array of original longitudes.
        kde_object (gaussian_kde): The calculated kernel density estimate object.

    Returns:
        tuple: A tuple containing:
            - pil_image (PIL.Image): The static KDE heatmap image.
            - colored_points_map_html (str): The HTML for the interactive map with colored points.
    """

    # --- Matplotlib Static Heatmap ---
    # Create a grid of points within the specified latitude and longitude range
    x, y = np.mgrid[min_lon:max_lon:100j, min_lat:max_lat:100j]
    positions = np.vstack([x.ravel(), y.ravel()])

    # Evaluate the kernel density estimate at each point in the grid
    z = kde_object(positions)

    # Reshape the density values into a 2D array corresponding to the grid
    z = z.reshape(x.shape)

    # Normalize the density values to the range [0, 1] for consistent colormap application
    z_normalized = (z - z.min()) / (z.max() - z.min()) if z.max() > z.min() else np.zeros_like(z)


    fig, ax = plt.subplots(figsize=(8, 8))
    im = ax.imshow(z_normalized.T, origin='lower',
                   extent=[min_lon, max_lon, min_lat, max_lat],
                   cmap='hot', aspect='auto')
    fig.colorbar(im, ax=ax, label='Density')
    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')
    ax.set_title('Kernel Density Estimate Heatmap (Static)')

    # To return a PIL Image for Gradio, save the plot to a buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    buf.seek(0)
    pil_image = Image.open(buf)
    plt.close(fig) # Close the plot to free up memory

    # --- Folium Interactive Map with Colored Points ---
    # Calculate density at the original points
    original_coordinates = np.vstack([original_longitudes, original_latitudes])
    density_at_original_points = kde_object(original_coordinates)

    # Normalize density values for coloring
    # Add a small epsilon to avoid division by zero if all densities are the same
    density_normalized = (density_at_original_points - density_at_original_points.min()) / (density_at_original_points.max() - density_at_original_points.min() + 1e-9)

    # Choose a colormap (e.g., 'viridis', 'hot', 'plasma')
    colormap = cm.get_cmap('viridis')

    # Create Folium map centered around the mean of the points
    map_center_lat = np.mean(original_latitudes)
    map_center_lon = np.mean(original_longitudes)
    m_colored_points = folium.Map(location=[map_center_lat, map_center_lon], zoom_start=10)

    # Add colored circle markers for each point
    for lat, lon, density_norm in zip(original_latitudes, original_longitudes, density_normalized):
        # Get color from colormap based on normalized density
        color = matplotlib.colors.rgb2hex(colormap(density_norm))

        # Add a circle marker with the determined color
        folium.CircleMarker(
            location=[lat, lon],
            radius=5, # Adjust marker size as needed
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7,
            tooltip=f"Density: {kde_object([lon, lat])[0]:.4f}" # Add density as a tooltip
        ).add_to(m_colored_points)

    # Save the colored points map to an HTML string
    colored_points_map_html = m_colored_points._repr_html_()

    return pil_image, colored_points_map_html # Return both the static heatmap image and the colored points map HTML

# Organized version #5: Update the Gradio interface



# The plot_kde_and_points function is defined in a previous cell
# The load_data_and_calculate_kde function is defined in a previous cell
# The distribution_files dictionary is defined in a previous cell
# The pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max are defined in a previous cell

# Define the main function that will be called by Gradio
def update_visualization(distribution_name):
    """
    Loads data for the selected distribution, calculates KDE, and generates visualizations.

    Args:
        distribution_name (str): The name of the selected distribution.

    Returns:
        tuple: A tuple containing:
            - pil_image (PIL.Image): The static KDE heatmap image.
            - colored_points_map_html (str): The HTML for the interactive map with colored points.
            - error_message (str): An error message if data loading fails, otherwise empty string.
    """
    latitudes, longitudes, kde_object, error = load_data_and_calculate_kde(distribution_name)

    if error:
        # Return empty or placeholder outputs and the error message
        return None, "", error

    # Use the modified visualization function that accepts latitudes, longitudes, and kde_object
    # Pass the predefined Pittsburgh coordinates
    pil_image, colored_points_map_html = plot_kde_and_points(
        pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max,
        latitudes, longitudes, kde_object
    )

    return pil_image, colored_points_map_html, "" # Return visualizations and empty error message


# Get the list of distribution names for the dropdown
distribution_choices = list(distribution_files.keys())

# Define the Gradio interface
iface = gr.Interface(
    fn=update_visualization,
    inputs=[
        gr.Dropdown(choices=distribution_choices, label="Select Distribution", value=distribution_choices[0]),
        # Removed the number inputs for latitude and longitude range
    ],
    outputs=[
        gr.Image(label="Static Kernel Density Map (Matplotlib)"),
        gr.HTML(label="Interactive Points Map Colored by KDE (Folium)"),
        gr.Textbox(label="Error Message", visible=False) # Add a textbox to display errors
    ],
    title="Kernel Density Estimation of Different Spatial Distributions around Pittsburgh",
    description="Select a spatial distribution from the dropdown to visualize its kernel density and point distribution around Pittsburgh."
)

# Launch the Gradio interface
iface.launch(share=True)

"""Here is the content for your `requirements.txt` file:"""