import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import plotly.express as px import re from geopy.geocoders import Nominatim from geopy.exc import GeocoderInsufficientPrivileges import time from PIL import Image import base64 # 設置頁面配置 st.set_page_config(layout="wide") # 讀取背景圖片 def get_base64_of_bin_file(bin_file): with open(bin_file, 'rb') as f: data = f.read() return base64.b64encode(data).decode() def set_png_as_page_bg(png_file): bin_str = get_base64_of_bin_file(png_file) page_bg_img = ''' ''' % bin_str st.markdown(page_bg_img, unsafe_allow_html=True) # 設置背景 set_png_as_page_bg('dog_background.png') # 初始化地理編碼器 geolocator = Nominatim(user_agent="my_unique_app/3.0") # 將區域轉換為經緯度的函數 @st.cache_data def get_lat_lon(district): try: location = geolocator.geocode(f"台南市{district}") if location: time.sleep(1) # 延遲以避免頻率限制 return location.latitude, location.longitude except GeocoderInsufficientPrivileges: st.error("地理編碼器遇到權限問題,請稍後再試。") return None, None # 抓取數據的函數 @st.cache_data def fetch_clinic_data(): clinic_data = [] for page in range(1, 6): # 減少頁數以加快測試 url = f"https://www.tw-animal.com/list/pet/17/{page}.html" response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') clinics = soup.find_all('div', class_='m-clinic__header t-clinic__header') for clinic in clinics: name_tag = clinic.find('strong') name = name_tag.get_text(strip=True) if name_tag else "N/A" rating_tag = clinic.find('span', class_='t-font-large') rating = rating_tag.get_text(strip=True) if rating_tag else "N/A" phone_tag = clinic.find('li').find_next_sibling('li').find('span', class_='t-font-medium') phone = phone_tag.get_text(strip=True) if phone_tag else "N/A" address_tag = clinic.find('li').find_next_sibling('li').find_next_sibling('li').find('span') address = address_tag.get_text(strip=True) if address_tag else "N/A" match = re.search(r'台南市(\S+區)', address) district = match.group(1) if match else "未知區域" clinic_data.append({ '診所名稱': name, '推薦度': float(rating) if rating != "N/A" else 0, '電話': phone, '地址': address, '區域': district }) df = pd.DataFrame(clinic_data) return df # Streamlit App st.title("動物醫院資料分析") # 抓取數據 df = fetch_clinic_data() # 新增進度條 progress_bar = st.progress(0) status_text = st.empty() # 地理編碼 total = len(df) for i, (index, row) in enumerate(df.iterrows()): lat, lon = get_lat_lon(row['區域']) df.at[index, '緯度'] = lat df.at[index, '經度'] = lon progress = (i + 1) / total progress_bar.progress(progress) status_text.text(f"處理進度: {int(progress*100)}%") # 移除進度條和狀態文本 progress_bar.empty() status_text.empty() # 篩選掉無法獲取經緯度的資料 df = df.dropna(subset=['緯度', '經度']) # 推薦度篩選器 rating_filter = st.slider("選擇推薦度範圍", 0.0, 5.0, (0.0, 5.0), 0.1) filtered_df = df[(df['推薦度'] >= rating_filter[0]) & (df['推薦度'] <= rating_filter[1])] # 顯示篩選結果 st.write(f"共有 {len(filtered_df)} 個診所符合推薦度範圍") st.dataframe(filtered_df) # 統計 # 1. 推薦度分佈分析 st.header("推薦度分佈分析") # 直方圖 fig_histogram = px.histogram(df, x="推薦度", nbins=10, title="推薦度直方圖", color_discrete_sequence=['#7DAFF2']) st.plotly_chart(fig_histogram) # 箱線圖 fig_boxplot = px.box(df, y="推薦度", title="推薦度箱線圖", color_discrete_sequence=['#FFB6C1']) st.plotly_chart(fig_boxplot) # 2. 地理分佈分析 st.header("地理分佈分析") # 使用區域的經緯度顯示診所地理分佈 fig_geo_dist = px.scatter_mapbox(df, lat="緯度", lon="經度", hover_name="診所名稱", hover_data=["推薦度", "電話", "地址"], color="區域", size="推薦度", zoom=10, height=600, title="動物醫院地理分佈") fig_geo_dist.update_layout(mapbox_style="open-street-map") st.plotly_chart(fig_geo_dist) # 3. 推薦度與地理位置的關聯性 st.header("推薦度與地理位置的關聯性") # 區域性推薦度分析 fig_bar = px.bar(df.groupby("區域")["推薦度"].mean().reset_index(), x="區域", y="推薦度", title="不同區域的平均推薦度比較", color_discrete_sequence=['#66CDAA']) st.plotly_chart(fig_bar) # 在地圖上展示推薦度的區域性差異(熱力圖) fig_heatmap = px.density_mapbox(df, lat="緯度", lon="經度", z="推薦度", radius=10, mapbox_style="open-street-map", zoom=10, title="動物醫院推薦度熱力圖") st.plotly_chart(fig_heatmap)