20240821B / app.py
sidcww's picture
Update app.py
368bc05 verified
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim
# 初始化 geopy 的 Nominatim geolocator
geolocator = Nominatim(user_agent="geoapiExercises")
# 從Google Sheets讀取網址
sheet_id = "1IywohlfSnpPND45mUZQM8F1r7JNUiZzC9ZyH9jB7ufc" # 替換為實際的sheet ID
df_urls = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# 檢查網址欄位名稱,假設它叫做 'URL'
urls = df_urls['URL'].tolist()
# 建立一個空的 DataFrame 來存放所有餐廳的資料
df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Description", "Latitude", "Longitude"])
# 迭代處理每個網址
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
# 嘗試抓取資料
try:
store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
except AttributeError:
store_name = None
try:
address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
except AttributeError:
address = None
try:
phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "")
except AttributeError:
phone = None
try:
description = soup.find("div", class_="restaurant-details__description--text").find("p").text.strip()
except AttributeError:
description = None
# 使用 geopy 取得經緯度
latitude, longitude = None, None
if address:
try:
location = geolocator.geocode(address)
if location:
latitude = location.latitude
longitude = location.longitude
except Exception as e:
print(f"Error getting geocode for address {address}: {e}")
# 將每個餐廳的資料轉換為 DataFrame 並使用 pd.concat 合併
new_row = pd.DataFrame({
"Store Name": [store_name],
"Address": [address],
"Phone": [phone],
"Description": [description],
"Latitude": [latitude],
"Longitude": [longitude]
})
df = pd.concat([df, new_row], ignore_index=True)
# 輸出 DataFrame
print(df)