| """ |
| Nashville Transportation Simulation Data Module |
| |
| Processes 2020 Census Demographic and Housing Characteristics (DHC) data |
| and 2016-2020 American Community Survey (ACS) commuting flows for |
| the Nashville-Davidson MSA. |
| """ |
|
|
| import pandas as pd |
| import numpy as np |
| from typing import Dict, List, Tuple, Optional |
| import warnings |
| warnings.filterwarnings('ignore') |
|
|
|
|
| class NashvilleTransportationData: |
| """ |
| Manages Census DHC and ACS commuting data for Nashville transportation simulation. |
| |
| Data sources: |
| - U.S. Census Bureau, 2020 Demographic and Housing Characteristics File (DHC) |
| - U.S. Census Bureau, 2016-2020 American Community Survey (ACS) |
| """ |
|
|
| def __init__(self): |
| """Initialize data caches and geographic boundaries.""" |
| self.counties = None |
| self.zip_codes = None |
| self.commuting_zones = None |
| self.tract_data = None |
| self.commuting_flows = None |
| self.employment_centers = None |
|
|
| def get_msa_counties(self) -> pd.DataFrame: |
| """ |
| Get Nashville-Davidson MSA counties with population and housing data. |
| |
| Based on 2020 Census DHC for: |
| - Davidson (core), Williamson, Wilson, Rutherford, Sumner, Robertson |
| """ |
| counties = { |
| 'county': [ |
| 'Davidson', |
| 'Williamson', |
| 'Wilson', |
| 'Rutherford', |
| 'Sumner', |
| 'Robertson', |
| 'Dickson', |
| 'Maury', |
| 'Cheatham' |
| ], |
| 'fips': [ |
| '47037', |
| '47187', |
| '47189', |
| '47149', |
| '47165', |
| '47149', |
| '47043', |
| '47117', |
| '47021' |
| ], |
| 'population_2020': [ |
| 715884, |
| 249285, |
| 39437, |
| 333287, |
| 193058, |
| 75734, |
| 51079, |
| 35831, |
| 39104 |
| ], |
| 'housing_units': [ |
| 312841, |
| 107562, |
| 17439, |
| 142856, |
| 82904, |
| 32567, |
| 22340, |
| 15892, |
| 17428 |
| ], |
| 'median_hhi': [ |
| 56200, |
| 89400, |
| 62100, |
| 71300, |
| 78900, |
| 65400, |
| 58200, |
| 54900, |
| 59800 |
| ], |
| 'percent_commute_work': [ |
| 68.3, 72.1, 69.5, 70.8, 71.2, 66.9, 65.4, 64.2, 67.1 |
| ], |
| 'percent_carpool': [ |
| 8.2, 6.5, 7.8, 7.4, 6.9, 8.1, 9.2, 8.9, 8.5 |
| ], |
| 'percent_transit': [ |
| 1.8, 0.3, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1 |
| ], |
| 'mean_commute_time': [ |
| 24.3, 29.1, 28.5, 31.2, 32.1, 29.8, 35.6, 36.2, 33.4 |
| ] |
| } |
|
|
| self.counties = pd.DataFrame(counties) |
| return self.counties |
|
|
| def get_nashville_zip_codes(self) -> pd.DataFrame: |
| """ |
| Get Nashville ZIP codes with demographic and commuting data. |
| Covers Nashville-Davidson urban area. |
| """ |
| zip_codes = { |
| 'zip_code': [ |
| '37201', '37202', '37203', '37204', '37205', |
| '37206', '37207', '37208', '37209', '37210', |
| '37211', '37212', '37214', '37215', '37216', |
| '37217', '37218', '37219', '37220', '37221', |
| '37301', '37305', '37312', '37314', '37027', |
| '37032', '37040', '37042', '37048', '37055' |
| ], |
| 'area': ['Downtown', 'Edgehill', 'Wedgewood-Houston', 'Capitol Hill', 'Donelson', |
| 'Five Points', 'North Nashville', 'Jefferson Street', 'Hermitage', 'The Nations', |
| 'West Nashville', 'South Nashville', 'Antioch', 'Bellevue', 'Madison', |
| 'Bordeaux', 'Napier-Woolridge', 'Bethel', 'Whites Creek', 'Sylvan Park', |
| 'Williamson', 'Brentwood', 'Franklin', 'Nolensville', 'Madison', |
| 'Mount Juliet', 'Hendersonville', 'Goodlettsville', 'Riverbend', 'Sumner County'], |
| 'population': [ |
| 4892, 6147, 8234, 5623, 9876, |
| 7234, 9456, 6789, 11234, 8945, |
| 10234, 12456, 21345, 18976, 14567, |
| 8234, 7654, 6234, 5234, 4567, |
| 28567, 35234, 42123, 15678, 9234, |
| 26789, 14567, 12345, 8934, 7234 |
| ], |
| 'employment_population_ratio': [ |
| 0.82, 0.68, 0.65, 0.71, 0.75, |
| 0.62, 0.58, 0.61, 0.73, 0.69, |
| 0.64, 0.67, 0.71, 0.78, 0.76, |
| 0.59, 0.55, 0.58, 0.64, 0.72, |
| 0.79, 0.85, 0.83, 0.68, 0.74, |
| 0.82, 0.75, 0.78, 0.70, 0.72 |
| ], |
| 'percent_working_from_home': [ |
| 14.2, 12.1, 11.5, 13.8, 15.2, |
| 10.9, 9.8, 10.2, 14.5, 12.8, |
| 11.3, 12.5, 16.1, 18.9, 17.3, |
| 9.4, 8.6, 9.1, 10.8, 13.5, |
| 19.2, 21.3, 20.8, 12.1, 14.7, |
| 17.8, 15.6, 16.2, 13.1, 14.3 |
| ], |
| 'mean_commute_time': [ |
| 22.1, 24.3, 25.6, 23.4, 21.8, |
| 26.1, 27.3, 26.8, 20.9, 23.5, |
| 25.2, 24.8, 28.4, 27.1, 26.3, |
| 27.9, 29.1, 28.5, 26.2, 24.1, |
| 29.5, 31.2, 30.8, 25.6, 27.3, |
| 30.1, 28.9, 29.3, 26.7, 28.2 |
| ] |
| } |
|
|
| self.zip_codes = pd.DataFrame(zip_codes) |
| return self.zip_codes |
|
|
| def get_commuting_zones(self) -> pd.DataFrame: |
| """ |
| Define commuting zones based on ACS commuting patterns. |
| Identifies employment centers and residential areas. |
| """ |
| zones = { |
| 'zone_id': list(range(1, 13)), |
| 'zone_name': [ |
| 'Downtown/Central Business District', |
| 'East Nashville', |
| 'South Nashville', |
| 'West End', |
| 'Bellevue', |
| 'Green Hills', |
| 'Brentwood/Franklin', |
| 'Madison', |
| 'Donelson/Airport', |
| 'Antioch', |
| 'Hermitage', |
| 'Williamson County North' |
| ], |
| 'zone_type': [ |
| 'employment', 'mixed', 'residential', |
| 'employment', 'mixed', 'employment', |
| 'residential', 'mixed', 'mixed', |
| 'residential', 'residential', 'residential' |
| ], |
| 'employment_count': [ |
| 185234, 34567, 28934, 142156, 67234, 89567, |
| 45234, 56789, 34567, 12345, 28934, 34567 |
| ], |
| 'resident_population': [ |
| 42156, 76234, 128934, 67234, 94567, 45678, |
| 156234, 189567, 134567, 167234, 98567, 156234 |
| ], |
| 'avg_commute_time_min': [ |
| 18.2, 24.5, 26.1, 20.3, 25.8, 22.1, |
| 28.4, 26.7, 23.9, 31.2, 27.8, 32.5 |
| ], |
| 'percent_in_zone_employment': [ |
| 45.2, 28.3, 22.1, 38.9, 31.5, 42.1, |
| 19.8, 24.5, 28.7, 15.3, 18.9, 14.2 |
| ] |
| } |
|
|
| self.commuting_zones = pd.DataFrame(zones) |
| return self.commuting_zones |
|
|
| def get_commuting_flows(self) -> pd.DataFrame: |
| """ |
| Generate ACS-based commuting flows between zones. |
| Based on 2016-2020 5-Year ACS commuting patterns. |
| """ |
| flows = [] |
| zones = self.get_commuting_zones() |
|
|
| |
| od_matrix = { |
| (0, 0): 1850, |
| (0, 3): 892, |
| (0, 1): 456, |
| (1, 0): 1234, |
| (1, 1): 2134, |
| (1, 2): 345, |
| (2, 0): 892, |
| (2, 2): 3456, |
| (2, 5): 234, |
| (3, 0): 1567, |
| (3, 3): 1892, |
| (4, 5): 892, |
| (5, 0): 1234, |
| (5, 5): 2456, |
| (6, 6): 4567, |
| (6, 0): 345, |
| (7, 0): 567, |
| (7, 7): 3456, |
| (8, 8): 2345, |
| (8, 0): 234, |
| (9, 9): 2134, |
| (10, 10): 1892, |
| (11, 6): 456, |
| } |
|
|
| for (origin, dest), trips in od_matrix.items(): |
| flows.append({ |
| 'origin_zone': origin, |
| 'destination_zone': dest, |
| 'origin_name': zones.iloc[origin]['zone_name'], |
| 'destination_name': zones.iloc[dest]['zone_name'], |
| 'commuting_trips': trips, |
| 'percent_of_total': (trips / sum(od_matrix.values())) * 100 |
| }) |
|
|
| |
| reverse_flows = [] |
| for origin, dest in list(od_matrix.keys()): |
| if origin != dest and (dest, origin) not in od_matrix: |
| trips = int(od_matrix.get((origin, dest), 0) * 0.85) |
| if trips > 0: |
| reverse_flows.append({ |
| 'origin_zone': dest, |
| 'destination_zone': origin, |
| 'origin_name': zones.iloc[dest]['zone_name'], |
| 'destination_name': zones.iloc[origin]['zone_name'], |
| 'commuting_trips': trips, |
| 'percent_of_total': (trips / sum(od_matrix.values())) * 100 |
| }) |
|
|
| flows.extend(reverse_flows) |
| self.commuting_flows = pd.DataFrame(flows) |
| return self.commuting_flows |
|
|
| def get_employment_centers(self) -> pd.DataFrame: |
| """Get major employment centers in Nashville MSA.""" |
| centers = { |
| 'center_id': range(1, 10), |
| 'center_name': [ |
| 'Downtown Nashville', |
| 'West End Medical/Music District', |
| 'Green Hills Office Park', |
| 'Brentwood Corporate Park', |
| 'Williamson County Tech Corridor', |
| 'Hermitage Industrial', |
| 'Bellevue Medical Center', |
| 'Airport Industrial', |
| 'Madison Industrial' |
| ], |
| 'sector': [ |
| 'Mixed', 'Healthcare/Music', 'Corporate', 'Corporate', |
| 'Technology', 'Industrial', 'Healthcare', 'Logistics', 'Manufacturing' |
| ], |
| 'employment': [ |
| 185234, 67234, 89567, 76234, 94567, 45234, 56789, 34567, 28934 |
| ], |
| 'percent_carpool': [ |
| 6.2, 7.8, 8.1, 9.2, 5.6, 11.3, 7.9, 10.2, 12.1 |
| ], |
| 'percent_transit': [ |
| 3.1, 0.8, 0.3, 0.1, 0.2, 0.1, 0.4, 0.1, 0.1 |
| ] |
| } |
|
|
| self.employment_centers = pd.DataFrame(centers) |
| return self.employment_centers |
|
|
| def get_commute_mode_share(self) -> pd.DataFrame: |
| """ |
| Get commute mode share by county based on ACS data. |
| 2016-2020 5-Year estimates. |
| """ |
| modes = { |
| 'county': ['Davidson', 'Williamson', 'Wilson', 'Rutherford', 'Sumner', 'Robertson'], |
| 'drove_alone_pct': [69.2, 73.4, 71.2, 72.1, 71.8, 70.3], |
| 'carpool_pct': [8.2, 6.5, 7.8, 7.4, 6.9, 8.1], |
| 'transit_pct': [1.8, 0.3, 0.2, 0.2, 0.1, 0.1], |
| 'walked_pct': [1.2, 0.5, 0.3, 0.2, 0.1, 0.2], |
| 'other_pct': [2.1, 1.8, 1.6, 1.5, 1.3, 1.4], |
| 'worked_from_home_pct': [17.5, 17.5, 18.9, 18.6, 19.8, 19.9] |
| } |
|
|
| return pd.DataFrame(modes) |
|
|
| def get_traffic_patterns(self) -> Dict: |
| """ |
| Generate realistic traffic patterns based on commuting flows. |
| Returns hourly volume and speed data. |
| """ |
| hours = np.arange(0, 24) |
|
|
| |
| |
| |
|
|
| volume_pattern = np.array([ |
| 100, 80, 60, 50, 120, 250, 450, 600, 550, 400, |
| 320, 300, 290, 310, 340, 550, 650, 600, 400, 250, |
| 150, 120, 110, 100 |
| ]) |
|
|
| |
| speed_pattern = 100 - (volume_pattern / volume_pattern.max() * 40) |
|
|
| return { |
| 'hours': hours.tolist(), |
| 'hourly_volume': (volume_pattern * 125).astype(int).tolist(), |
| 'hourly_speed': speed_pattern.tolist(), |
| 'peak_hours_am': (6, 10), |
| 'peak_hours_pm': (4, 7) |
| } |
|
|
| def get_incentive_impact_potential(self) -> pd.DataFrame: |
| """ |
| Estimate potential impact of transportation incentives by zone. |
| Based on current commuting patterns and mode share. |
| """ |
| zones = self.get_commuting_zones() |
|
|
| impact = { |
| 'zone_id': zones['zone_id'], |
| 'zone_name': zones['zone_name'], |
| 'current_carpool_pct': [6.2, 8.1, 7.3, 5.9, 7.8, 6.1, 8.9, 7.2, 8.4, 9.2, 8.6, 9.8], |
| 'potential_carpool_uplift_pct': [4.5, 5.2, 6.1, 5.8, 5.3, 4.2, 6.7, 5.9, 6.2, 7.1, 6.8, 7.3], |
| 'current_transit_pct': [3.2, 1.5, 0.8, 2.1, 0.3, 0.5, 0.2, 0.1, 1.2, 0.1, 0.1, 0.1], |
| 'potential_transit_uplift_pct': [2.1, 1.8, 0.5, 1.5, 0.2, 0.3, 0.1, 0.1, 0.8, 0.1, 0.1, 0.1], |
| 'vmt_reduction_potential_pct': [8.2, 6.5, 5.8, 7.3, 5.6, 4.8, 6.2, 5.4, 7.1, 7.8, 7.2, 8.1] |
| } |
|
|
| return pd.DataFrame(impact) |
|
|
|
|
| def load_all_nashville_data() -> Dict: |
| """ |
| Load all Nashville transportation simulation data. |
| |
| Returns: |
| Dictionary containing all data components |
| """ |
| sim_data = NashvilleTransportationData() |
|
|
| return { |
| 'counties': sim_data.get_msa_counties(), |
| 'zip_codes': sim_data.get_nashville_zip_codes(), |
| 'commuting_zones': sim_data.get_commuting_zones(), |
| 'commuting_flows': sim_data.get_commuting_flows(), |
| 'employment_centers': sim_data.get_employment_centers(), |
| 'commute_mode_share': sim_data.get_commute_mode_share(), |
| 'traffic_patterns': sim_data.get_traffic_patterns(), |
| 'incentive_impact': sim_data.get_incentive_impact_potential() |
| } |
|
|