Spaces:
Sleeping
Sleeping
| import requests | |
| import pandas as pd | |
| import re | |
| import math | |
| from typing import Tuple, List, Dict | |
| def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]: | |
| overpass_url = "http://overpass-api.de/api/interpreter" | |
| overpass_query = f""" | |
| [out:json]; | |
| ( | |
| node["name"](around:{radius},{lat},{lon}); | |
| way["name"](around:{radius},{lat},{lon}); | |
| relation["name"](around:{radius},{lat},{lon}); | |
| ); | |
| out center; | |
| """ | |
| response = requests.get(overpass_url, params={'data': overpass_query}) | |
| data = response.json() | |
| return data['elements'] | |
| def determine_location_type(tags: Dict[str, str]) -> str: | |
| # Residential | |
| if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']: | |
| return 'Residential' | |
| # Commercial | |
| if any(key in tags for key in ['shop', 'office', 'craft']): | |
| return 'Commercial' | |
| if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']: | |
| return 'Commercial' | |
| # Industrial | |
| if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']: | |
| return 'Industrial' | |
| if 'industrial' in tags or 'industry' in tags: | |
| return 'Industrial' | |
| # Educational | |
| if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']: | |
| return 'Educational' | |
| # Healthcare | |
| if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']: | |
| return 'Healthcare' | |
| # Food & Drink | |
| if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']: | |
| return 'Food & Drink' | |
| # Leisure & Entertainment | |
| if 'leisure' in tags or 'tourism' in tags: | |
| return 'Leisure & Entertainment' | |
| if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']: | |
| return 'Leisure & Entertainment' | |
| # Transportation | |
| if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']: | |
| return 'Transportation' | |
| if 'highway' in tags or 'railway' in tags or 'aeroway' in tags: | |
| return 'Transportation' | |
| # Religious | |
| if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']: | |
| return 'Religious' | |
| # Government & Public Services | |
| if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']: | |
| return 'Government & Public Services' | |
| # Parks & Recreation | |
| if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']: | |
| return 'Parks & Recreation' | |
| # Natural | |
| if 'natural' in tags: | |
| return 'Natural' | |
| # Landuse | |
| if 'landuse' in tags: | |
| landuse = tags['landuse'].capitalize() | |
| if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']: | |
| return landuse | |
| else: | |
| return f'Landuse: {landuse}' | |
| # If no specific category is found, return 'Other' | |
| return 'Other' | |
| def parse_osm_data(elements: List[Dict]) -> pd.DataFrame: | |
| parsed_data = [] | |
| for element in elements: | |
| tags = element.get('tags', {}) | |
| parsed_element = { | |
| 'ID': f"{element['type']}_{element['id']}", | |
| 'Location Name': tags.get('name', ''), | |
| 'Location Type': determine_location_type(tags) | |
| } | |
| parsed_data.append(parsed_element) | |
| if len(parsed_data) == 0: | |
| return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type']) | |
| return pd.DataFrame(parsed_data) | |
| def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame: | |
| raw_data = fetch_osm_data(lat, lon, radius) | |
| return parse_osm_data(raw_data) | |
| def dms_to_decimal(coord_str): | |
| # Regular expression to match the coordinate format | |
| pattern = r'(\d+)°(\d+)\'([\d.]+)"([NS])\s*(\d+)°(\d+)\'([\d.]+)"([EW])' | |
| match = re.match(pattern, coord_str) | |
| if not match: | |
| raise ValueError("Invalid coordinate format. Expected format: 19°03'08.6\"N 72°54'06.0\"E") | |
| lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups() | |
| # Convert to decimal degrees | |
| lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600 | |
| lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600 | |
| # Adjust sign based on direction | |
| if lat_dir == 'S': | |
| lat = -lat | |
| if lon_dir == 'W': | |
| lon = -lon | |
| return lat, lon | |
| def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple: | |
| # Earth's radius in meters | |
| R = 6371000 | |
| # Convert latitude and longitude to radians | |
| lat_rad = math.radians(lat) | |
| lon_rad = math.radians(lon) | |
| # Calculate the point with the same latitude (moving east-west) | |
| delta_lon = distance / (R * math.cos(lat_rad)) | |
| lon1 = lon + math.degrees(delta_lon) | |
| # Calculate the point with the same longitude (moving north-south) | |
| delta_lat = distance / R | |
| lat2 = lat + math.degrees(delta_lat) | |
| return ((lat, lon1), (lat2, lon)) | |
| ## 2d map grid (0,0) --> bottom left | |
| def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]: | |
| grid = [] | |
| lat_unit = (top_right[0] - bottom_left[0]) / rows | |
| lon_unit = (top_right[1] - bottom_left[1]) / cols | |
| for i in range(rows): | |
| row = [] | |
| for j in range(cols): | |
| lat = bottom_left[0] + i * lat_unit | |
| lon = bottom_left[1] + j * lon_unit | |
| lat = lat + lat_unit / 2 | |
| lon = lon + lon_unit / 2 | |
| row.append((lat, lon)) | |
| grid.append(row) | |
| return grid | |
| ## entire pipeline | |
| # left_lat = 18.889833 | |
| # left_lon = 72.779844 | |
| # dist = 35 | |
| def input_filter(lat=None, lon=None, string=None): | |
| if lat != None: | |
| return (lat, lon) | |
| elif string != None: | |
| latitude, longitude = dms_to_decimal(string) | |
| return (latitude, longitude) | |
| else: | |
| return None | |
| def get_data(bottom_left_lat, bottom_left_lon, dist): | |
| result = calculate_distant_points(bottom_left_lat, bottom_left_lon, 1000*dist) | |
| top_right_lat = result[1][0] | |
| top_right_lon = result[0][1] | |
| grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist) | |
| grid_dataset = [] | |
| for i, row in enumerate(grid): | |
| for j, point in enumerate(row): | |
| result_df = get_osm_data(point[0], point[1], 710) | |
| # print(result_df.head(3)) | |
| labelled_df = result_df[result_df['Location Type'] != 'Other'] | |
| labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious'] | |
| labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation'] | |
| loc_types = [] | |
| for row in labelled_df.iterrows(): | |
| loc_type = (row[1]['Location Name'], row[1]['Location Type']) | |
| if loc_type not in loc_types: | |
| loc_types.append(loc_type) | |
| labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type']) | |
| row_of_dataset = '' | |
| for row in labelled_df.iterrows(): | |
| row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type'] | |
| row_of_dataset += row_text + '; ' | |
| ## replacing any coma in the text with a blank space | |
| row_of_dataset = row_of_dataset.replace(',', ' ') | |
| grid_row = {"row": i, "col": j, "latitude": point[0], "longitude": point[1], "Map Data": row_of_dataset} | |
| grid_dataset.append(grid_row) | |
| grid_df = pd.DataFrame(grid_dataset) | |
| return grid_df | |
| # grid_df.to_csv('MMR_DATASET.csv', index=False) |