Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import plotly.express as px | |
| import re | |
| from geopy.geocoders import Nominatim | |
| from geopy.exc import GeocoderInsufficientPrivileges | |
| import time | |
| from PIL import Image | |
| import base64 | |
| # 設置頁面配置 | |
| st.set_page_config(layout="wide") | |
| # 讀取背景圖片 | |
| def get_base64_of_bin_file(bin_file): | |
| with open(bin_file, 'rb') as f: | |
| data = f.read() | |
| return base64.b64encode(data).decode() | |
| def set_png_as_page_bg(png_file): | |
| bin_str = get_base64_of_bin_file(png_file) | |
| page_bg_img = ''' | |
| <style> | |
| .stApp { | |
| background-image: url("data:image/png;base64,%s"); | |
| background-size: cover; | |
| } | |
| </style> | |
| ''' % bin_str | |
| st.markdown(page_bg_img, unsafe_allow_html=True) | |
| # 設置背景 | |
| set_png_as_page_bg('dog_background.png') | |
| # 初始化地理編碼器 | |
| geolocator = Nominatim(user_agent="my_unique_app/3.0") | |
| # 將區域轉換為經緯度的函數 | |
| def get_lat_lon(district): | |
| try: | |
| location = geolocator.geocode(f"台南市{district}") | |
| if location: | |
| time.sleep(1) # 延遲以避免頻率限制 | |
| return location.latitude, location.longitude | |
| except GeocoderInsufficientPrivileges: | |
| st.error("地理編碼器遇到權限問題,請稍後再試。") | |
| return None, None | |
| # 抓取數據的函數 | |
| def fetch_clinic_data(): | |
| clinic_data = [] | |
| for page in range(1, 6): # 減少頁數以加快測試 | |
| url = f"https://www.tw-animal.com/list/pet/17/{page}.html" | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| clinics = soup.find_all('div', class_='m-clinic__header t-clinic__header') | |
| for clinic in clinics: | |
| name_tag = clinic.find('strong') | |
| name = name_tag.get_text(strip=True) if name_tag else "N/A" | |
| rating_tag = clinic.find('span', class_='t-font-large') | |
| rating = rating_tag.get_text(strip=True) if rating_tag else "N/A" | |
| phone_tag = clinic.find('li').find_next_sibling('li').find('span', class_='t-font-medium') | |
| phone = phone_tag.get_text(strip=True) if phone_tag else "N/A" | |
| address_tag = clinic.find('li').find_next_sibling('li').find_next_sibling('li').find('span') | |
| address = address_tag.get_text(strip=True) if address_tag else "N/A" | |
| match = re.search(r'台南市(\S+區)', address) | |
| district = match.group(1) if match else "未知區域" | |
| clinic_data.append({ | |
| '診所名稱': name, | |
| '推薦度': float(rating) if rating != "N/A" else 0, | |
| '電話': phone, | |
| '地址': address, | |
| '區域': district | |
| }) | |
| df = pd.DataFrame(clinic_data) | |
| return df | |
| # Streamlit App | |
| st.title("動物醫院資料分析") | |
| # 抓取數據 | |
| df = fetch_clinic_data() | |
| # 新增進度條 | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| # 地理編碼 | |
| total = len(df) | |
| for i, (index, row) in enumerate(df.iterrows()): | |
| lat, lon = get_lat_lon(row['區域']) | |
| df.at[index, '緯度'] = lat | |
| df.at[index, '經度'] = lon | |
| progress = (i + 1) / total | |
| progress_bar.progress(progress) | |
| status_text.text(f"處理進度: {int(progress*100)}%") | |
| # 移除進度條和狀態文本 | |
| progress_bar.empty() | |
| status_text.empty() | |
| # 篩選掉無法獲取經緯度的資料 | |
| df = df.dropna(subset=['緯度', '經度']) | |
| # 推薦度篩選器 | |
| rating_filter = st.slider("選擇推薦度範圍", 0.0, 5.0, (0.0, 5.0), 0.1) | |
| filtered_df = df[(df['推薦度'] >= rating_filter[0]) & (df['推薦度'] <= rating_filter[1])] | |
| # 顯示篩選結果 | |
| st.write(f"共有 {len(filtered_df)} 個診所符合推薦度範圍") | |
| st.dataframe(filtered_df) | |
| # 統計 | |
| # 1. 推薦度分佈分析 | |
| st.header("推薦度分佈分析") | |
| # 直方圖 | |
| fig_histogram = px.histogram(df, x="推薦度", nbins=10, title="推薦度直方圖", | |
| color_discrete_sequence=['#7DAFF2']) | |
| st.plotly_chart(fig_histogram) | |
| # 箱線圖 | |
| fig_boxplot = px.box(df, y="推薦度", title="推薦度箱線圖", | |
| color_discrete_sequence=['#FFB6C1']) | |
| st.plotly_chart(fig_boxplot) | |
| # 2. 地理分佈分析 | |
| st.header("地理分佈分析") | |
| # 使用區域的經緯度顯示診所地理分佈 | |
| fig_geo_dist = px.scatter_mapbox(df, lat="緯度", lon="經度", | |
| hover_name="診所名稱", hover_data=["推薦度", "電話", "地址"], | |
| color="區域", size="推薦度", | |
| zoom=10, height=600, title="動物醫院地理分佈") | |
| fig_geo_dist.update_layout(mapbox_style="open-street-map") | |
| st.plotly_chart(fig_geo_dist) | |
| # 3. 推薦度與地理位置的關聯性 | |
| st.header("推薦度與地理位置的關聯性") | |
| # 區域性推薦度分析 | |
| fig_bar = px.bar(df.groupby("區域")["推薦度"].mean().reset_index(), | |
| x="區域", y="推薦度", title="不同區域的平均推薦度比較", | |
| color_discrete_sequence=['#66CDAA']) | |
| st.plotly_chart(fig_bar) | |
| # 在地圖上展示推薦度的區域性差異(熱力圖) | |
| fig_heatmap = px.density_mapbox(df, lat="緯度", lon="經度", | |
| z="推薦度", radius=10, | |
| mapbox_style="open-street-map", zoom=10, | |
| title="動物醫院推薦度熱力圖") | |
| st.plotly_chart(fig_heatmap) | |