ddecosmo commited on
Commit
bd27d0b
·
verified ·
1 Parent(s): a10abe2

Upload 2 files

Browse files
Files changed (2) hide show
  1. kernel_density_prototype.py +381 -0
  2. requirements.txt +7 -0
kernel_density_prototype.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Kernel_density_prototype.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1binQn5KdO6tLQHL8uV6s8pTSdXTiArl4
8
+ """
9
+
10
+
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ import os # Import os for file path joining
15
+ import matplotlib.pyplot as plt
16
+ import matplotlib.cm as cm
17
+ import numpy as np
18
+ from PIL import Image
19
+ import io
20
+ import folium
21
+ from folium.plugins import HeatMap
22
+ from folium import Marker # Import Marker for plotting points
23
+ import matplotlib.colors # Import for color conversion
24
+ import pandas as pd # Import pandas for DataFrame if needed, though not strictly for this function
25
+
26
+ import pandas as pd
27
+ from scipy.stats import gaussian_kde
28
+ import numpy as np
29
+ import gradio as gr
30
+
31
+ # Organized version #1: Define Pittsburgh Coordinate Range
32
+
33
+ # Define the latitude and longitude boundaries for the Pittsburgh area
34
+ # These are approximate bounds and can be adjusted
35
+ pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
36
+ pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8
37
+
38
+ # Define a central point for generating some distributions (e.g., Normal)
39
+ pittsburgh_lat = 40.4406 # Example center latitude
40
+ pittsburgh_lon = -79.9959 # Example center longitude
41
+
42
+
43
+ print(f"Pittsburgh Latitude Range: {pittsburgh_lat_min} to {pittsburgh_lat_max}")
44
+ print(f"Pittsburgh Longitude Range: {pittsburgh_lon_min} to {pittsburgh_lon_max}")
45
+
46
+ # Organized version #2: Generate and save temporary CSV files
47
+
48
+
49
+ # Define the number of points for each distribution
50
+ num_points = 500
51
+
52
+ # Define the Pittsburgh coordinate range (assuming these are defined in a previous cell)
53
+ # If not, uncomment and define them here or ensure the previous cell is run first.
54
+ # pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6
55
+ # pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8
56
+ # pittsburgh_lat = 40.4406 # Example center
57
+ # pittsburgh_lon = -79.9959 # Example center
58
+
59
+
60
+ # Function to generate uniformly distributed points
61
+ def generate_uniform_points(lat_min, lat_max, lon_min, lon_max, num_points):
62
+ lats = np.random.uniform(lat_min, lat_max, num_points)
63
+ lons = np.random.uniform(lon_min, lon_max, num_points)
64
+ return pd.DataFrame({'latitude': lats, 'longitude': lons})
65
+
66
+ # Function to generate normally distributed points (clustered around a center)
67
+ def generate_normal_points(center_lat, center_lon, lat_std, lon_std, num_points):
68
+ lats = np.random.normal(center_lat, lat_std, num_points)
69
+ lons = np.random.normal(center_lon, lon_std, num_points)
70
+ # Filter to keep points within the original range after adding noise (optional but good)
71
+ valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
72
+ lats = lats[valid_indices]
73
+ lons = lons[valid_indices]
74
+ # If after filtering we have significantly less points than requested, we might need to regenerate
75
+ # For simplicity here, we'll just use the filtered points.
76
+ return pd.DataFrame({'latitude': lats, 'longitude': lons})
77
+
78
+
79
+ # Function to generate bimodal points (two clusters)
80
+ def generate_bimodal_points(center1_lat, center1_lon, center2_lat, center2_lon, lat_std, lon_std, num_points):
81
+ # Generate half the points around the first center
82
+ num_points_half = num_points // 2
83
+ lats1 = np.random.normal(center1_lat, lat_std, num_points_half)
84
+ lons1 = np.random.normal(center1_lon, lon_std, num_points_half)
85
+
86
+ # Generate the other half around the second center
87
+ lats2 = np.random.normal(center2_lat, lat_std, num_points - num_points_half)
88
+ lons2 = np.random.normal(center2_lon, lon_std, num_points - num_points_half)
89
+
90
+ # Combine the points
91
+ lats = np.concatenate([lats1, lats2])
92
+ lons = np.concatenate([lons1, lons2])
93
+
94
+ # Filter to keep points within the original range
95
+ valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max)
96
+ lats = lats[valid_indices]
97
+ lons = lons[valid_indices]
98
+
99
+ return pd.DataFrame({'latitude': lats, 'longitude': lons})
100
+
101
+ # Function to generate points with a Poisson-like distribution (approximated)
102
+ # Generating truly spatially random points following a Poisson process within a region is more complex,
103
+ # often involving generating a Poisson number of points and then distributing them uniformly.
104
+ # A simpler approximation for visualization purposes could be generating clusters with varying densities,
105
+ # or using a transformation of uniform points.
106
+ # For this example, let's generate points with varying density based on a simple transformation,
107
+ # or alternatively, generate several small clusters.
108
+ # Let's go with generating several small clusters to simulate a non-uniform, potentially "clumpy" distribution.
109
+ def generate_poisson_like_points(lat_min, lat_max, lon_min, lon_max, num_points, num_clusters=10, cluster_std=0.01):
110
+ all_lats = []
111
+ all_lons = []
112
+ points_per_cluster = num_points // num_clusters
113
+
114
+ # Generate random centers for the clusters within the overall range
115
+ cluster_centers_lat = np.random.uniform(lat_min + cluster_std, lat_max - cluster_std, num_clusters)
116
+ cluster_centers_lon = np.random.uniform(lon_min + cluster_std, lon_max - cluster_std, num_clusters)
117
+
118
+
119
+ for i in range(num_clusters):
120
+ lats = np.random.normal(cluster_centers_lat[i], cluster_std, points_per_cluster)
121
+ lons = np.random.normal(cluster_centers_lon[i], cluster_std, points_per_cluster)
122
+ all_lats.extend(lats)
123
+ all_lons.extend(lons)
124
+
125
+ lats = np.array(all_lats)
126
+ lons = np.array(all_lons)
127
+
128
+ # Filter to keep points within the original range
129
+ valid_indices = (lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)
130
+ lats = lats[valid_indices]
131
+ lons = lons[valid_indices]
132
+
133
+
134
+ return pd.DataFrame({'latitude': lats, 'longitude': lons})
135
+
136
+
137
+ # Generate data for different distributions
138
+ uniform_df = generate_uniform_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)
139
+ normal_df = generate_normal_points(pittsburgh_lat, pittsburgh_lon, 0.05, 0.05, num_points) # Using the original pittsburgh_lat/lon as center
140
+ # Define centers for bimodal distribution within the Pittsburgh range
141
+ bimodal_center1_lat, bimodal_center1_lon = 40.4, -80.1
142
+ bimodal_center2_lat, bimodal_center2_lon = 40.5, -79.9
143
+ bimodal_df = generate_bimodal_points(bimodal_center1_lat, bimodal_center1_lon, bimodal_center2_lat, bimodal_center2_lon, 0.03, 0.03, num_points)
144
+ poisson_like_df = generate_poisson_like_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points)
145
+
146
+
147
+ # Define directory to save CSVs
148
+ csv_dir = "spatial_data"
149
+ os.makedirs(csv_dir, exist_ok=True) # Create the directory if it doesn't exist
150
+
151
+ # Define file paths
152
+ uniform_csv_path = os.path.join(csv_dir, "uniform_coords.csv")
153
+ normal_csv_path = os.path.join(csv_dir, "normal_coords.csv")
154
+ bimodal_csv_path = os.path.join(csv_dir, "bimodal_coords.csv")
155
+ poisson_csv_path = os.path.join(csv_dir, "poisson_like_coords.csv")
156
+
157
+ # Save dataframes to CSV files
158
+ uniform_df.to_csv(uniform_csv_path, index=False)
159
+ normal_df.to_csv(normal_csv_path, index=False)
160
+ bimodal_df.to_csv(bimodal_csv_path, index=False)
161
+ poisson_like_df.to_csv(poisson_csv_path, index=False)
162
+
163
+
164
+ print(f"Saved uniform data to {uniform_csv_path}")
165
+ print(f"Saved normal data to {normal_csv_path}")
166
+ print(f"Saved bimodal data to {bimodal_csv_path}")
167
+ print(f"Saved poisson-like data to {poisson_csv_path}")
168
+
169
+
170
+ # Store the file paths and distribution names for the Gradio dropdown
171
+ distribution_files = {
172
+ "Uniform": uniform_csv_path,
173
+ "Normal": normal_csv_path,
174
+ "Bimodal": bimodal_csv_path,
175
+ "Poisson-like": poisson_csv_path
176
+ }
177
+
178
+ # Organized version #3: Create a function to load data and calculate KDE
179
+
180
+
181
+ def load_data_and_calculate_kde(distribution_name):
182
+ """
183
+ Loads coordinate data for a given distribution name and calculates its KDE.
184
+
185
+ Args:
186
+ distribution_name (str): The name of the distribution (key in distribution_files).
187
+
188
+ Returns:
189
+ tuple: A tuple containing:
190
+ - latitudes (np.ndarray): Array of latitudes.
191
+ - longitudes (np.ndarray): Array of longitudes.
192
+ - kde_object (gaussian_kde): The calculated kernel density estimate object.
193
+ - error_message (str or None): An error message if loading fails, otherwise None.
194
+ """
195
+ file_path = distribution_files.get(distribution_name)
196
+
197
+ if file_path is None:
198
+ return None, None, None, f"Error: Unknown distribution name '{distribution_name}'"
199
+
200
+ try:
201
+ df = pd.read_csv(file_path)
202
+ if 'latitude' not in df.columns or 'longitude' not in df.columns:
203
+ return None, None, None, f"Error: CSV file '{file_path}' must contain 'latitude' and 'longitude' columns."
204
+
205
+ latitudes = df['latitude'].values
206
+ longitudes = df['longitude'].values
207
+
208
+ # Combine coordinates into a 2D array for KDE
209
+ coordinates = np.vstack([longitudes, latitudes])
210
+
211
+ # Compute the kernel density estimate
212
+ kde_object = gaussian_kde(coordinates)
213
+
214
+ return latitudes, longitudes, kde_object, None
215
+
216
+ except FileNotFoundError:
217
+ return None, None, None, f"Error: File not found at '{file_path}'"
218
+ except Exception as e:
219
+ return None, None, None, f"Error loading data or calculating KDE: {e}"
220
+
221
+ # Example usage (optional - for testing the function)
222
+ # test_distribution = "Uniform"
223
+ # test_lats, test_lons, test_kde, error = load_data_and_calculate_kde(test_distribution)
224
+ # if error:
225
+ # print(error)
226
+ # else:
227
+ # print(f"Successfully loaded data and calculated KDE for {test_distribution}. KDE object: {test_kde}")
228
+
229
+ # Organized version #4: Create a function to visualize KDE and points
230
+
231
+
232
+ def plot_kde_and_points(min_lat, max_lat, min_lon, max_lon, original_latitudes, original_longitudes, kde_object):
233
+ """
234
+ Generates a static KDE heatmap and an interactive Folium map with points colored by KDE density.
235
+
236
+ Args:
237
+ min_lat (float): Minimum latitude for the static heatmap.
238
+ max_lat (float): Maximum latitude for the static heatmap.
239
+ min_lon (float): Minimum longitude for the static heatmap.
240
+ max_lon (float): Maximum longitude for the static heatmap.
241
+ original_latitudes (np.ndarray): Array of original latitudes.
242
+ original_longitudes (np.ndarray): Array of original longitudes.
243
+ kde_object (gaussian_kde): The calculated kernel density estimate object.
244
+
245
+ Returns:
246
+ tuple: A tuple containing:
247
+ - pil_image (PIL.Image): The static KDE heatmap image.
248
+ - colored_points_map_html (str): The HTML for the interactive map with colored points.
249
+ """
250
+
251
+ # --- Matplotlib Static Heatmap ---
252
+ # Create a grid of points within the specified latitude and longitude range
253
+ x, y = np.mgrid[min_lon:max_lon:100j, min_lat:max_lat:100j]
254
+ positions = np.vstack([x.ravel(), y.ravel()])
255
+
256
+ # Evaluate the kernel density estimate at each point in the grid
257
+ z = kde_object(positions)
258
+
259
+ # Reshape the density values into a 2D array corresponding to the grid
260
+ z = z.reshape(x.shape)
261
+
262
+ # Normalize the density values to the range [0, 1] for consistent colormap application
263
+ z_normalized = (z - z.min()) / (z.max() - z.min()) if z.max() > z.min() else np.zeros_like(z)
264
+
265
+
266
+ fig, ax = plt.subplots(figsize=(8, 8))
267
+ im = ax.imshow(z_normalized.T, origin='lower',
268
+ extent=[min_lon, max_lon, min_lat, max_lat],
269
+ cmap='hot', aspect='auto')
270
+ fig.colorbar(im, ax=ax, label='Density')
271
+ ax.set_xlabel('Longitude')
272
+ ax.set_ylabel('Latitude')
273
+ ax.set_title('Kernel Density Estimate Heatmap (Static)')
274
+
275
+ # To return a PIL Image for Gradio, save the plot to a buffer
276
+ buf = io.BytesIO()
277
+ plt.savefig(buf, format='png', bbox_inches='tight')
278
+ buf.seek(0)
279
+ pil_image = Image.open(buf)
280
+ plt.close(fig) # Close the plot to free up memory
281
+
282
+ # --- Folium Interactive Map with Colored Points ---
283
+ # Calculate density at the original points
284
+ original_coordinates = np.vstack([original_longitudes, original_latitudes])
285
+ density_at_original_points = kde_object(original_coordinates)
286
+
287
+ # Normalize density values for coloring
288
+ # Add a small epsilon to avoid division by zero if all densities are the same
289
+ density_normalized = (density_at_original_points - density_at_original_points.min()) / (density_at_original_points.max() - density_at_original_points.min() + 1e-9)
290
+
291
+ # Choose a colormap (e.g., 'viridis', 'hot', 'plasma')
292
+ colormap = cm.get_cmap('viridis')
293
+
294
+ # Create Folium map centered around the mean of the points
295
+ map_center_lat = np.mean(original_latitudes)
296
+ map_center_lon = np.mean(original_longitudes)
297
+ m_colored_points = folium.Map(location=[map_center_lat, map_center_lon], zoom_start=10)
298
+
299
+ # Add colored circle markers for each point
300
+ for lat, lon, density_norm in zip(original_latitudes, original_longitudes, density_normalized):
301
+ # Get color from colormap based on normalized density
302
+ color = matplotlib.colors.rgb2hex(colormap(density_norm))
303
+
304
+ # Add a circle marker with the determined color
305
+ folium.CircleMarker(
306
+ location=[lat, lon],
307
+ radius=5, # Adjust marker size as needed
308
+ color=color,
309
+ fill=True,
310
+ fill_color=color,
311
+ fill_opacity=0.7,
312
+ tooltip=f"Density: {kde_object([lon, lat])[0]:.4f}" # Add density as a tooltip
313
+ ).add_to(m_colored_points)
314
+
315
+ # Save the colored points map to an HTML string
316
+ colored_points_map_html = m_colored_points._repr_html_()
317
+
318
+ return pil_image, colored_points_map_html # Return both the static heatmap image and the colored points map HTML
319
+
320
+ # Organized version #5: Update the Gradio interface
321
+
322
+
323
+
324
+ # The plot_kde_and_points function is defined in a previous cell
325
+ # The load_data_and_calculate_kde function is defined in a previous cell
326
+ # The distribution_files dictionary is defined in a previous cell
327
+ # The pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max are defined in a previous cell
328
+
329
+ # Define the main function that will be called by Gradio
330
+ def update_visualization(distribution_name):
331
+ """
332
+ Loads data for the selected distribution, calculates KDE, and generates visualizations.
333
+
334
+ Args:
335
+ distribution_name (str): The name of the selected distribution.
336
+
337
+ Returns:
338
+ tuple: A tuple containing:
339
+ - pil_image (PIL.Image): The static KDE heatmap image.
340
+ - colored_points_map_html (str): The HTML for the interactive map with colored points.
341
+ - error_message (str): An error message if data loading fails, otherwise empty string.
342
+ """
343
+ latitudes, longitudes, kde_object, error = load_data_and_calculate_kde(distribution_name)
344
+
345
+ if error:
346
+ # Return empty or placeholder outputs and the error message
347
+ return None, "", error
348
+
349
+ # Use the modified visualization function that accepts latitudes, longitudes, and kde_object
350
+ # Pass the predefined Pittsburgh coordinates
351
+ pil_image, colored_points_map_html = plot_kde_and_points(
352
+ pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max,
353
+ latitudes, longitudes, kde_object
354
+ )
355
+
356
+ return pil_image, colored_points_map_html, "" # Return visualizations and empty error message
357
+
358
+
359
+ # Get the list of distribution names for the dropdown
360
+ distribution_choices = list(distribution_files.keys())
361
+
362
+ # Define the Gradio interface
363
+ iface = gr.Interface(
364
+ fn=update_visualization,
365
+ inputs=[
366
+ gr.Dropdown(choices=distribution_choices, label="Select Distribution", value=distribution_choices[0]),
367
+ # Removed the number inputs for latitude and longitude range
368
+ ],
369
+ outputs=[
370
+ gr.Image(label="Static Kernel Density Map (Matplotlib)"),
371
+ gr.HTML(label="Interactive Points Map Colored by KDE (Folium)"),
372
+ gr.Textbox(label="Error Message", visible=False) # Add a textbox to display errors
373
+ ],
374
+ title="Kernel Density Estimation of Different Spatial Distributions around Pittsburgh",
375
+ description="Select a spatial distribution from the dropdown to visualize its kernel density and point distribution around Pittsburgh."
376
+ )
377
+
378
+ # Launch the Gradio interface
379
+ iface.launch(share=True)
380
+
381
+ """Here is the content for your `requirements.txt` file:"""
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ matplotlib
4
+ pillow
5
+ scipy
6
+ folium
7
+ gradio