testAHos_back / app.py
Spencer525's picture
Update app.py
6f06d71 verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import re
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderInsufficientPrivileges
import time
from PIL import Image
import base64
# 設置頁面配置
st.set_page_config(layout="wide")
# 讀取背景圖片
def get_base64_of_bin_file(bin_file):
with open(bin_file, 'rb') as f:
data = f.read()
return base64.b64encode(data).decode()
def set_png_as_page_bg(png_file):
bin_str = get_base64_of_bin_file(png_file)
page_bg_img = '''
<style>
.stApp {
background-image: url("data:image/png;base64,%s");
background-size: cover;
}
</style>
''' % bin_str
st.markdown(page_bg_img, unsafe_allow_html=True)
# 設置背景
set_png_as_page_bg('dog_background.png')
# 初始化地理編碼器
geolocator = Nominatim(user_agent="my_unique_app/3.0")
# 將區域轉換為經緯度的函數
@st.cache_data
def get_lat_lon(district):
try:
location = geolocator.geocode(f"台南市{district}")
if location:
time.sleep(1) # 延遲以避免頻率限制
return location.latitude, location.longitude
except GeocoderInsufficientPrivileges:
st.error("地理編碼器遇到權限問題,請稍後再試。")
return None, None
# 抓取數據的函數
@st.cache_data
def fetch_clinic_data():
clinic_data = []
for page in range(1, 6): # 減少頁數以加快測試
url = f"https://www.tw-animal.com/list/pet/17/{page}.html"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
clinics = soup.find_all('div', class_='m-clinic__header t-clinic__header')
for clinic in clinics:
name_tag = clinic.find('strong')
name = name_tag.get_text(strip=True) if name_tag else "N/A"
rating_tag = clinic.find('span', class_='t-font-large')
rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"
phone_tag = clinic.find('li').find_next_sibling('li').find('span', class_='t-font-medium')
phone = phone_tag.get_text(strip=True) if phone_tag else "N/A"
address_tag = clinic.find('li').find_next_sibling('li').find_next_sibling('li').find('span')
address = address_tag.get_text(strip=True) if address_tag else "N/A"
match = re.search(r'台南市(\S+區)', address)
district = match.group(1) if match else "未知區域"
clinic_data.append({
'診所名稱': name,
'推薦度': float(rating) if rating != "N/A" else 0,
'電話': phone,
'地址': address,
'區域': district
})
df = pd.DataFrame(clinic_data)
return df
# Streamlit App
st.title("動物醫院資料分析")
# 抓取數據
df = fetch_clinic_data()
# 新增進度條
progress_bar = st.progress(0)
status_text = st.empty()
# 地理編碼
total = len(df)
for i, (index, row) in enumerate(df.iterrows()):
lat, lon = get_lat_lon(row['區域'])
df.at[index, '緯度'] = lat
df.at[index, '經度'] = lon
progress = (i + 1) / total
progress_bar.progress(progress)
status_text.text(f"處理進度: {int(progress*100)}%")
# 移除進度條和狀態文本
progress_bar.empty()
status_text.empty()
# 篩選掉無法獲取經緯度的資料
df = df.dropna(subset=['緯度', '經度'])
# 推薦度篩選器
rating_filter = st.slider("選擇推薦度範圍", 0.0, 5.0, (0.0, 5.0), 0.1)
filtered_df = df[(df['推薦度'] >= rating_filter[0]) & (df['推薦度'] <= rating_filter[1])]
# 顯示篩選結果
st.write(f"共有 {len(filtered_df)} 個診所符合推薦度範圍")
st.dataframe(filtered_df)
# 統計
# 1. 推薦度分佈分析
st.header("推薦度分佈分析")
# 直方圖
fig_histogram = px.histogram(df, x="推薦度", nbins=10, title="推薦度直方圖",
color_discrete_sequence=['#7DAFF2'])
st.plotly_chart(fig_histogram)
# 箱線圖
fig_boxplot = px.box(df, y="推薦度", title="推薦度箱線圖",
color_discrete_sequence=['#FFB6C1'])
st.plotly_chart(fig_boxplot)
# 2. 地理分佈分析
st.header("地理分佈分析")
# 使用區域的經緯度顯示診所地理分佈
fig_geo_dist = px.scatter_mapbox(df, lat="緯度", lon="經度",
hover_name="診所名稱", hover_data=["推薦度", "電話", "地址"],
color="區域", size="推薦度",
zoom=10, height=600, title="動物醫院地理分佈")
fig_geo_dist.update_layout(mapbox_style="open-street-map")
st.plotly_chart(fig_geo_dist)
# 3. 推薦度與地理位置的關聯性
st.header("推薦度與地理位置的關聯性")
# 區域性推薦度分析
fig_bar = px.bar(df.groupby("區域")["推薦度"].mean().reset_index(),
x="區域", y="推薦度", title="不同區域的平均推薦度比較",
color_discrete_sequence=['#66CDAA'])
st.plotly_chart(fig_bar)
# 在地圖上展示推薦度的區域性差異(熱力圖)
fig_heatmap = px.density_mapbox(df, lat="緯度", lon="經度",
z="推薦度", radius=10,
mapbox_style="open-street-map", zoom=10,
title="動物醫院推薦度熱力圖")
st.plotly_chart(fig_heatmap)