20240821A / app.py
sidcww's picture
Update app.py
be4e1e7 verified
import requests
from bs4 import BeautifulSoup
import pandas as pd
import streamlit as st
# 從Google Sheets讀取網址
sheet_id = "1IywohlfSnpPND45mUZQM8F1r7JNUiZzC9ZyH9jB7ufc" # 替換為實際的sheet ID
df_urls = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# 檢查網址欄位名稱,假設它叫做 'URL'
urls = df_urls['URL'].tolist()
# 建立一個空的 DataFrame 來存放所有餐廳的資料
df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Description"])
# 迭代處理每個網址
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
# 嘗試抓取資料
try:
store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
except AttributeError:
store_name = None
try:
address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
except AttributeError:
address = None
try:
phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "")
except AttributeError:
phone = None
try:
description = soup.find("div", class_="restaurant-details__description--text").find("p").text.strip()
except AttributeError:
description = None
# 將每個餐廳的資料轉換為 DataFrame 並使用 pd.concat 合併
new_row = pd.DataFrame({
"Store Name": [store_name],
"Address": [address],
"Phone": [phone],
"Description": [description]
})
df = pd.concat([df, new_row], ignore_index=True)
# 在Streamlit介面中顯示DataFrame
st.title("餐廳資料表")
st.dataframe(df)