| import requests |
| from bs4 import BeautifulSoup |
| import pandas as pd |
| from geopy.geocoders import Nominatim |
|
|
| |
| geolocator = Nominatim(user_agent="geoapiExercises") |
|
|
| |
| sheet_id = "1IywohlfSnpPND45mUZQM8F1r7JNUiZzC9ZyH9jB7ufc" |
| df_urls = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") |
|
|
| |
| urls = df_urls['URL'].tolist() |
|
|
| |
| df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Description", "Latitude", "Longitude"]) |
|
|
| |
| for url in urls: |
| response = requests.get(url) |
| soup = BeautifulSoup(response.content, "html.parser") |
|
|
| |
| try: |
| store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip() |
| except AttributeError: |
| store_name = None |
|
|
| try: |
| address = soup.find("li", class_="restaurant-details__heading--address").text.strip() |
| except AttributeError: |
| address = None |
|
|
| try: |
| phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "") |
| except AttributeError: |
| phone = None |
|
|
| try: |
| description = soup.find("div", class_="restaurant-details__description--text").find("p").text.strip() |
| except AttributeError: |
| description = None |
|
|
| |
| latitude, longitude = None, None |
| if address: |
| try: |
| location = geolocator.geocode(address) |
| if location: |
| latitude = location.latitude |
| longitude = location.longitude |
| except Exception as e: |
| print(f"Error getting geocode for address {address}: {e}") |
|
|
| |
| new_row = pd.DataFrame({ |
| "Store Name": [store_name], |
| "Address": [address], |
| "Phone": [phone], |
| "Description": [description], |
| "Latitude": [latitude], |
| "Longitude": [longitude] |
| }) |
|
|
| df = pd.concat([df, new_row], ignore_index=True) |
|
|
| |
| print(df) |
|
|