Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """Kernel_density_prototype.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1binQn5KdO6tLQHL8uV6s8pTSdXTiArl4 | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| import os # Import os for file path joining | |
| import matplotlib.pyplot as plt | |
| import matplotlib.cm as cm | |
| import numpy as np | |
| from PIL import Image | |
| import io | |
| import folium | |
| from folium.plugins import HeatMap | |
| from folium import Marker # Import Marker for plotting points | |
| import matplotlib.colors # Import for color conversion | |
| import pandas as pd # Import pandas for DataFrame if needed, though not strictly for this function | |
| import pandas as pd | |
| from scipy.stats import gaussian_kde | |
| import numpy as np | |
| import gradio as gr | |
| # Organized version #1: Define Pittsburgh Coordinate Range | |
| # Define the latitude and longitude boundaries for the Pittsburgh area | |
| # These are approximate bounds and can be adjusted | |
| pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6 | |
| pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8 | |
| # Define a central point for generating some distributions (e.g., Normal) | |
| pittsburgh_lat = 40.4406 # Example center latitude | |
| pittsburgh_lon = -79.9959 # Example center longitude | |
| print(f"Pittsburgh Latitude Range: {pittsburgh_lat_min} to {pittsburgh_lat_max}") | |
| print(f"Pittsburgh Longitude Range: {pittsburgh_lon_min} to {pittsburgh_lon_max}") | |
| # Organized version #2: Generate and save temporary CSV files | |
| # Define the number of points for each distribution | |
| num_points = 500 | |
| # Define the Pittsburgh coordinate range (assuming these are defined in a previous cell) | |
| # If not, uncomment and define them here or ensure the previous cell is run first. | |
| # pittsburgh_lat_min, pittsburgh_lat_max = 40.3, 40.6 | |
| # pittsburgh_lon_min, pittsburgh_lon_max = -80.2, -79.8 | |
| # pittsburgh_lat = 40.4406 # Example center | |
| # pittsburgh_lon = -79.9959 # Example center | |
| # Function to generate uniformly distributed points | |
| def generate_uniform_points(lat_min, lat_max, lon_min, lon_max, num_points): | |
| lats = np.random.uniform(lat_min, lat_max, num_points) | |
| lons = np.random.uniform(lon_min, lon_max, num_points) | |
| return pd.DataFrame({'latitude': lats, 'longitude': lons}) | |
| # Function to generate normally distributed points (clustered around a center) | |
| def generate_normal_points(center_lat, center_lon, lat_std, lon_std, num_points): | |
| lats = np.random.normal(center_lat, lat_std, num_points) | |
| lons = np.random.normal(center_lon, lon_std, num_points) | |
| # Filter to keep points within the original range after adding noise (optional but good) | |
| valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max) | |
| lats = lats[valid_indices] | |
| lons = lons[valid_indices] | |
| # If after filtering we have significantly less points than requested, we might need to regenerate | |
| # For simplicity here, we'll just use the filtered points. | |
| return pd.DataFrame({'latitude': lats, 'longitude': lons}) | |
| # Function to generate bimodal points (two clusters) | |
| def generate_bimodal_points(center1_lat, center1_lon, center2_lat, center2_lon, lat_std, lon_std, num_points): | |
| # Generate half the points around the first center | |
| num_points_half = num_points // 2 | |
| lats1 = np.random.normal(center1_lat, lat_std, num_points_half) | |
| lons1 = np.random.normal(center1_lon, lon_std, num_points_half) | |
| # Generate the other half around the second center | |
| lats2 = np.random.normal(center2_lat, lat_std, num_points - num_points_half) | |
| lons2 = np.random.normal(center2_lon, lon_std, num_points - num_points_half) | |
| # Combine the points | |
| lats = np.concatenate([lats1, lats2]) | |
| lons = np.concatenate([lons1, lons2]) | |
| # Filter to keep points within the original range | |
| valid_indices = (lats >= pittsburgh_lat_min) & (lats <= pittsburgh_lat_max) & (lons >= pittsburgh_lon_min) & (lons <= pittsburgh_lon_max) | |
| lats = lats[valid_indices] | |
| lons = lons[valid_indices] | |
| return pd.DataFrame({'latitude': lats, 'longitude': lons}) | |
| # Function to generate points with a Poisson-like distribution (approximated) | |
| # Generating truly spatially random points following a Poisson process within a region is more complex, | |
| # often involving generating a Poisson number of points and then distributing them uniformly. | |
| # A simpler approximation for visualization purposes could be generating clusters with varying densities, | |
| # or using a transformation of uniform points. | |
| # For this example, let's generate points with varying density based on a simple transformation, | |
| # or alternatively, generate several small clusters. | |
| # Let's go with generating several small clusters to simulate a non-uniform, potentially "clumpy" distribution. | |
| def generate_poisson_like_points(lat_min, lat_max, lon_min, lon_max, num_points, num_clusters=10, cluster_std=0.01): | |
| all_lats = [] | |
| all_lons = [] | |
| points_per_cluster = num_points // num_clusters | |
| # Generate random centers for the clusters within the overall range | |
| cluster_centers_lat = np.random.uniform(lat_min + cluster_std, lat_max - cluster_std, num_clusters) | |
| cluster_centers_lon = np.random.uniform(lon_min + cluster_std, lon_max - cluster_std, num_clusters) | |
| for i in range(num_clusters): | |
| lats = np.random.normal(cluster_centers_lat[i], cluster_std, points_per_cluster) | |
| lons = np.random.normal(cluster_centers_lon[i], cluster_std, points_per_cluster) | |
| all_lats.extend(lats) | |
| all_lons.extend(lons) | |
| lats = np.array(all_lats) | |
| lons = np.array(all_lons) | |
| # Filter to keep points within the original range | |
| valid_indices = (lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max) | |
| lats = lats[valid_indices] | |
| lons = lons[valid_indices] | |
| return pd.DataFrame({'latitude': lats, 'longitude': lons}) | |
| # Generate data for different distributions | |
| uniform_df = generate_uniform_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points) | |
| normal_df = generate_normal_points(pittsburgh_lat, pittsburgh_lon, 0.05, 0.05, num_points) # Using the original pittsburgh_lat/lon as center | |
| # Define centers for bimodal distribution within the Pittsburgh range | |
| bimodal_center1_lat, bimodal_center1_lon = 40.4, -80.1 | |
| bimodal_center2_lat, bimodal_center2_lon = 40.5, -79.9 | |
| bimodal_df = generate_bimodal_points(bimodal_center1_lat, bimodal_center1_lon, bimodal_center2_lat, bimodal_center2_lon, 0.03, 0.03, num_points) | |
| poisson_like_df = generate_poisson_like_points(pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, num_points) | |
| # Define directory to save CSVs | |
| csv_dir = "spatial_data" | |
| os.makedirs(csv_dir, exist_ok=True) # Create the directory if it doesn't exist | |
| # Define file paths | |
| uniform_csv_path = os.path.join(csv_dir, "uniform_coords.csv") | |
| normal_csv_path = os.path.join(csv_dir, "normal_coords.csv") | |
| bimodal_csv_path = os.path.join(csv_dir, "bimodal_coords.csv") | |
| poisson_csv_path = os.path.join(csv_dir, "poisson_like_coords.csv") | |
| # Save dataframes to CSV files | |
| uniform_df.to_csv(uniform_csv_path, index=False) | |
| normal_df.to_csv(normal_csv_path, index=False) | |
| bimodal_df.to_csv(bimodal_csv_path, index=False) | |
| poisson_like_df.to_csv(poisson_csv_path, index=False) | |
| print(f"Saved uniform data to {uniform_csv_path}") | |
| print(f"Saved normal data to {normal_csv_path}") | |
| print(f"Saved bimodal data to {bimodal_csv_path}") | |
| print(f"Saved poisson-like data to {poisson_csv_path}") | |
| # Store the file paths and distribution names for the Gradio dropdown | |
| distribution_files = { | |
| "Uniform": uniform_csv_path, | |
| "Normal": normal_csv_path, | |
| "Bimodal": bimodal_csv_path, | |
| "Poisson-like": poisson_csv_path | |
| } | |
| # Organized version #3: Create a function to load data and calculate KDE | |
| def load_data_and_calculate_kde(distribution_name): | |
| """ | |
| Loads coordinate data for a given distribution name and calculates its KDE. | |
| Args: | |
| distribution_name (str): The name of the distribution (key in distribution_files). | |
| Returns: | |
| tuple: A tuple containing: | |
| - latitudes (np.ndarray): Array of latitudes. | |
| - longitudes (np.ndarray): Array of longitudes. | |
| - kde_object (gaussian_kde): The calculated kernel density estimate object. | |
| - error_message (str or None): An error message if loading fails, otherwise None. | |
| """ | |
| file_path = distribution_files.get(distribution_name) | |
| if file_path is None: | |
| return None, None, None, f"Error: Unknown distribution name '{distribution_name}'" | |
| try: | |
| df = pd.read_csv(file_path) | |
| if 'latitude' not in df.columns or 'longitude' not in df.columns: | |
| return None, None, None, f"Error: CSV file '{file_path}' must contain 'latitude' and 'longitude' columns." | |
| latitudes = df['latitude'].values | |
| longitudes = df['longitude'].values | |
| # Combine coordinates into a 2D array for KDE | |
| coordinates = np.vstack([longitudes, latitudes]) | |
| # Compute the kernel density estimate | |
| kde_object = gaussian_kde(coordinates) | |
| return latitudes, longitudes, kde_object, None | |
| except FileNotFoundError: | |
| return None, None, None, f"Error: File not found at '{file_path}'" | |
| except Exception as e: | |
| return None, None, None, f"Error loading data or calculating KDE: {e}" | |
| # Example usage (optional - for testing the function) | |
| # test_distribution = "Uniform" | |
| # test_lats, test_lons, test_kde, error = load_data_and_calculate_kde(test_distribution) | |
| # if error: | |
| # print(error) | |
| # else: | |
| # print(f"Successfully loaded data and calculated KDE for {test_distribution}. KDE object: {test_kde}") | |
| # Organized version #4: Create a function to visualize KDE and points | |
| def plot_kde_and_points(min_lat, max_lat, min_lon, max_lon, original_latitudes, original_longitudes, kde_object): | |
| """ | |
| Generates a static KDE heatmap and an interactive Folium map with points colored by KDE density. | |
| Args: | |
| min_lat (float): Minimum latitude for the static heatmap. | |
| max_lat (float): Maximum latitude for the static heatmap. | |
| min_lon (float): Minimum longitude for the static heatmap. | |
| max_lon (float): Maximum longitude for the static heatmap. | |
| original_latitudes (np.ndarray): Array of original latitudes. | |
| original_longitudes (np.ndarray): Array of original longitudes. | |
| kde_object (gaussian_kde): The calculated kernel density estimate object. | |
| Returns: | |
| tuple: A tuple containing: | |
| - pil_image (PIL.Image): The static KDE heatmap image. | |
| - colored_points_map_html (str): The HTML for the interactive map with colored points. | |
| """ | |
| # --- Matplotlib Static Heatmap --- | |
| # Create a grid of points within the specified latitude and longitude range | |
| x, y = np.mgrid[min_lon:max_lon:100j, min_lat:max_lat:100j] | |
| positions = np.vstack([x.ravel(), y.ravel()]) | |
| # Evaluate the kernel density estimate at each point in the grid | |
| z = kde_object(positions) | |
| # Reshape the density values into a 2D array corresponding to the grid | |
| z = z.reshape(x.shape) | |
| # Normalize the density values to the range [0, 1] for consistent colormap application | |
| z_normalized = (z - z.min()) / (z.max() - z.min()) if z.max() > z.min() else np.zeros_like(z) | |
| fig, ax = plt.subplots(figsize=(8, 8)) | |
| im = ax.imshow(z_normalized.T, origin='lower', | |
| extent=[min_lon, max_lon, min_lat, max_lat], | |
| cmap='hot', aspect='auto') | |
| fig.colorbar(im, ax=ax, label='Density') | |
| ax.set_xlabel('Longitude') | |
| ax.set_ylabel('Latitude') | |
| ax.set_title('Kernel Density Estimate Heatmap (Static)') | |
| # To return a PIL Image for Gradio, save the plot to a buffer | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png', bbox_inches='tight') | |
| buf.seek(0) | |
| pil_image = Image.open(buf) | |
| plt.close(fig) # Close the plot to free up memory | |
| # --- Folium Interactive Map with Colored Points --- | |
| # Calculate density at the original points | |
| original_coordinates = np.vstack([original_longitudes, original_latitudes]) | |
| density_at_original_points = kde_object(original_coordinates) | |
| # Normalize density values for coloring | |
| # Add a small epsilon to avoid division by zero if all densities are the same | |
| density_normalized = (density_at_original_points - density_at_original_points.min()) / (density_at_original_points.max() - density_at_original_points.min() + 1e-9) | |
| # Choose a colormap (e.g., 'viridis', 'hot', 'plasma') | |
| colormap = cm.get_cmap('viridis') | |
| # Create Folium map centered around the mean of the points | |
| map_center_lat = np.mean(original_latitudes) | |
| map_center_lon = np.mean(original_longitudes) | |
| m_colored_points = folium.Map(location=[map_center_lat, map_center_lon], zoom_start=10) | |
| # Add colored circle markers for each point | |
| for lat, lon, density_norm in zip(original_latitudes, original_longitudes, density_normalized): | |
| # Get color from colormap based on normalized density | |
| color = matplotlib.colors.rgb2hex(colormap(density_norm)) | |
| # Add a circle marker with the determined color | |
| folium.CircleMarker( | |
| location=[lat, lon], | |
| radius=5, # Adjust marker size as needed | |
| color=color, | |
| fill=True, | |
| fill_color=color, | |
| fill_opacity=0.7, | |
| tooltip=f"Density: {kde_object([lon, lat])[0]:.4f}" # Add density as a tooltip | |
| ).add_to(m_colored_points) | |
| # Save the colored points map to an HTML string | |
| colored_points_map_html = m_colored_points._repr_html_() | |
| return pil_image, colored_points_map_html # Return both the static heatmap image and the colored points map HTML | |
| # Organized version #5: Update the Gradio interface | |
| # The plot_kde_and_points function is defined in a previous cell | |
| # The load_data_and_calculate_kde function is defined in a previous cell | |
| # The distribution_files dictionary is defined in a previous cell | |
| # The pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max are defined in a previous cell | |
| # Define the main function that will be called by Gradio | |
| def update_visualization(distribution_name): | |
| """ | |
| Loads data for the selected distribution, calculates KDE, and generates visualizations. | |
| Args: | |
| distribution_name (str): The name of the selected distribution. | |
| Returns: | |
| tuple: A tuple containing: | |
| - pil_image (PIL.Image): The static KDE heatmap image. | |
| - colored_points_map_html (str): The HTML for the interactive map with colored points. | |
| - error_message (str): An error message if data loading fails, otherwise empty string. | |
| """ | |
| latitudes, longitudes, kde_object, error = load_data_and_calculate_kde(distribution_name) | |
| if error: | |
| # Return empty or placeholder outputs and the error message | |
| return None, "", error | |
| # Use the modified visualization function that accepts latitudes, longitudes, and kde_object | |
| # Pass the predefined Pittsburgh coordinates | |
| pil_image, colored_points_map_html = plot_kde_and_points( | |
| pittsburgh_lat_min, pittsburgh_lat_max, pittsburgh_lon_min, pittsburgh_lon_max, | |
| latitudes, longitudes, kde_object | |
| ) | |
| return pil_image, colored_points_map_html, "" # Return visualizations and empty error message | |
| # Get the list of distribution names for the dropdown | |
| distribution_choices = list(distribution_files.keys()) | |
| # Define the Gradio interface | |
| iface = gr.Interface( | |
| fn=update_visualization, | |
| inputs=[ | |
| gr.Dropdown(choices=distribution_choices, label="Select Distribution", value=distribution_choices[0]), | |
| # Removed the number inputs for latitude and longitude range | |
| ], | |
| outputs=[ | |
| gr.Image(label="Static Kernel Density Map (Matplotlib)"), | |
| gr.HTML(label="Interactive Points Map Colored by KDE (Folium)"), | |
| gr.Textbox(label="Error Message", visible=False) # Add a textbox to display errors | |
| ], | |
| title="Kernel Density Estimation of Different Spatial Distributions around Pittsburgh", | |
| description="Select a spatial distribution from the dropdown to visualize its kernel density and point distribution around Pittsburgh." | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch(share=True) | |
| """Here is the content for your `requirements.txt` file:""" |