Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import plotly.express as px | |
| import base64 | |
| import folium | |
| from streamlit_folium import st_folium | |
| from geopy.geocoders import Nominatim | |
| from geopy.exc import GeocoderTimedOut, GeocoderServiceError | |
| import time | |
| from folium.plugins import MarkerCluster # 新增此行用於標記聚合 | |
| # 設定背景圖片的函數 | |
| def set_background(png_file): | |
| with open(png_file, "rb") as f: | |
| data = f.read() | |
| encoded = base64.b64encode(data).decode() | |
| st.markdown( | |
| f""" | |
| <style> | |
| .stApp {{ | |
| background: url(data:image/png;base64,{encoded}); | |
| background-size: cover; | |
| }} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # 設定背景圖片 | |
| set_background('CAT.png') | |
| # App 的標題 | |
| st.title("寵物醫院評分查詢") | |
| # 用戶輸入的最低評分 | |
| min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5) | |
| # 要爬取的 URL 列表 | |
| urls = [ | |
| "https://www.tw-animal.com/pet/171211/c000196.html", | |
| "https://www.tw-animal.com/pet/171211/c000186.html", | |
| # ... 其他 URL ... | |
| ] | |
| # 存放提取數據的空列表 | |
| data_list = [] | |
| # 初始化地理編碼器 | |
| geolocator = Nominatim(user_agent="geoapiExercises") | |
| geocode_cache = {} # 簡單的內存緩存 | |
| # 用於地理編碼地址的函數,帶有重試和緩存 | |
| def geocode_address(address, retries=5, delay=5): | |
| if address in geocode_cache: | |
| return geocode_cache[address] | |
| for i in range(retries): | |
| try: | |
| location = geolocator.geocode(address) | |
| if location: | |
| geocode_cache[address] = location | |
| return location | |
| except (GeocoderTimedOut, GeocoderServiceError) as e: | |
| st.warning(f"地理編碼錯誤: {e}. 重試中...") | |
| time.sleep(delay) | |
| st.warning(f"無法地理編碼地址: {address}") | |
| return None | |
| # 當按下「開始爬取資料」按鈕時執行 | |
| if st.button('開始爬取資料'): | |
| st.write("正在爬取資料,請稍候...") | |
| # 迴圈遍歷每個 URL 並提取數據 | |
| for url in urls: | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # 提取數據 | |
| title = soup.find('h1', class_='t-intro__title').get_text(strip=True) | |
| phone = soup.find('a', class_='t-font-large').get_text(strip=True) | |
| address = soup.find('a', class_='t-font-medium').get_text(strip=True) | |
| rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True)) | |
| # 如果評分達到門檻,將數據添加到列表 | |
| if rating >= min_rating: | |
| location = geocode_address(address) | |
| if location: | |
| data_list.append({ | |
| "標題": title, | |
| "手機": phone, | |
| "地址": address, | |
| "評分": rating, | |
| "經度": location.longitude, | |
| "緯度": location.latitude | |
| }) | |
| # 如果成功爬取到數據 | |
| if data_list: | |
| df1 = pd.DataFrame(data_list) | |
| # 從地址中提取區域(假設區域是地址的一部分) | |
| df1['區域'] = df1['地址'].apply(lambda x: x.split()[0]) | |
| # 按區域分組,合併同區域的醫院 | |
| grouped_df = df1.groupby('區域').agg({ | |
| '標題': lambda x: ' | '.join(x), | |
| '手機': lambda x: ' | '.join(x), | |
| '地址': lambda x: ' | '.join(x), | |
| '評分': 'mean' # 平均評分 | |
| }).reset_index() | |
| # 顯示數據表格 | |
| st.dataframe(df1) | |
| # 顯示 Plotly 柱狀圖 | |
| bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'}) | |
| st.plotly_chart(bar_fig) | |
| # 顯示 Plotly 圓餅圖 | |
| pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例") | |
| st.plotly_chart(pie_fig) | |
| # 顯示地圖 | |
| if st.button('顯示地圖'): | |
| # 創建一個 Folium 地圖,集中在平均位置 | |
| map_center = [df1['緯度'].mean(), df1['經度'].mean()] | |
| pet_map = folium.Map(location=map_center, zoom_start=12) | |
| # 創建一個標記聚合器 | |
| marker_cluster = MarkerCluster().add_to(pet_map) | |
| # 為每家醫院添加標記 | |
| for index, row in df1.iterrows(): | |
| folium.Marker( | |
| location=[row['緯度'], row['經度']], | |
| popup=f"{row['標題']} (評分: {row['評分']})", | |
| tooltip=row['標題'] | |
| ).add_to(marker_cluster) # 添加到標記聚合器中 | |
| # 使用 streamlit_folium 渲染地圖 | |
| st_folium(pet_map, width=700, height=500) | |