| | |
| | """ |
| | Extract additional features from existing Overture Maps data |
| | - Hospitals, clinics, pharmacies |
| | - Government offices |
| | - Tourist attractions |
| | - Restaurants, hotels |
| | """ |
| |
|
| | import geopandas as gpd |
| | from pathlib import Path |
| | import logging |
| |
|
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | DATA_DIR = Path(__file__).parent.parent / "data" |
| | OVERTURE_DIR = DATA_DIR / "overture" |
| | OUTPUT_DIR = DATA_DIR / "enriched" |
| |
|
| | def extract_healthcare(): |
| | """Extract healthcare facilities from Overture places""" |
| | logger.info("Extracting healthcare facilities...") |
| | |
| | places_path = OVERTURE_DIR / "places.geojson" |
| | gdf = gpd.read_file(places_path) |
| | |
| | |
| | healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health'] |
| | healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)] |
| | |
| | logger.info(f"Found {len(healthcare_gdf)} healthcare facilities") |
| | |
| | |
| | OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
| | output_path = OUTPUT_DIR / "healthcare_facilities.geojson" |
| | healthcare_gdf.to_file(output_path, driver='GeoJSON') |
| | |
| | return output_path, len(healthcare_gdf) |
| |
|
| | def extract_tourism(): |
| | """Extract tourist attractions""" |
| | logger.info("Extracting tourist attractions...") |
| | |
| | places_path = OVERTURE_DIR / "places.geojson" |
| | gdf = gpd.read_file(places_path) |
| | |
| | |
| | tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium'] |
| | tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)] |
| | |
| | logger.info(f"Found {len(tourism_gdf)} tourist attractions") |
| | |
| | |
| | output_path = OUTPUT_DIR / "tourist_attractions.geojson" |
| | tourism_gdf.to_file(output_path, driver='GeoJSON') |
| | |
| | return output_path, len(tourism_gdf) |
| |
|
| | def extract_accommodation(): |
| | """Extract hotels and accommodation""" |
| | logger.info("Extracting accommodation...") |
| | |
| | places_path = OVERTURE_DIR / "places.geojson" |
| | gdf = gpd.read_file(places_path) |
| | |
| | |
| | accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house'] |
| | accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)] |
| | |
| | logger.info(f"Found {len(accommodation_gdf)} accommodation facilities") |
| | |
| | |
| | output_path = OUTPUT_DIR / "accommodation.geojson" |
| | accommodation_gdf.to_file(output_path, driver='GeoJSON') |
| | |
| | return output_path, len(accommodation_gdf) |
| |
|
| | def extract_restaurants(): |
| | """Extract restaurants and food services""" |
| | logger.info("Extracting restaurants...") |
| | |
| | places_path = OVERTURE_DIR / "places.geojson" |
| | gdf = gpd.read_file(places_path) |
| | |
| | |
| | restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court'] |
| | restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)] |
| | |
| | logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes") |
| | |
| | |
| | output_path = OUTPUT_DIR / "restaurants.geojson" |
| | restaurant_gdf.to_file(output_path, driver='GeoJSON') |
| | |
| | return output_path, len(restaurant_gdf) |
| |
|
| | def main(): |
| | logger.info("=== Extracting features from Overture data ===") |
| | |
| | results = [] |
| | |
| | try: |
| | path, count = extract_healthcare() |
| | results.append({"dataset": "healthcare_facilities", "count": count}) |
| | except Exception as e: |
| | logger.error(f"Failed healthcare extraction: {e}") |
| | |
| | try: |
| | path, count = extract_tourism() |
| | results.append({"dataset": "tourist_attractions", "count": count}) |
| | except Exception as e: |
| | logger.error(f"Failed tourism extraction: {e}") |
| | |
| | try: |
| | path, count = extract_accommodation() |
| | results.append({"dataset": "accommodation", "count": count}) |
| | except Exception as e: |
| | logger.error(f"Failed accommodation extraction: {e}") |
| | |
| | try: |
| | path, count = extract_restaurants() |
| | results.append({"dataset": "restaurants", "count": count}) |
| | except Exception as e: |
| | logger.error(f"Failed restaurant extraction: {e}") |
| | |
| | logger.info("\n=== Extraction Summary ===") |
| | for result in results: |
| | logger.info(f" {result['dataset']}: {result['count']} features") |
| | |
| | return results |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|