import pandas as pd import requests from bs4 import BeautifulSoup import plotly.express as px import plotly.graph_objects as go import streamlit as st # Streamlit 應用程序標題 st.title("各區餐廳數量分佈分析") # 從 Google 試算表中讀取 URLs sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk" urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") # 將 URLs 轉換為列表 urls = urls_df['網址'].tolist() # 假設表格中的 URL 列名為"網址" # 創建按鈕並定義處理邏輯 if st.button("點擊開始爬取資料並生成圖表"): with st.spinner('正在爬取資料...'): progress = st.progress(0) # 初始化一個空的 DataFrame 列表來存儲所有數據 df_list = [] # 迭代每個網址並爬取數據 for i, url in enumerate(urls): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # 解析並抓取所需數據 title = soup.find('h1', class_='restaurant-details__heading--title').text.strip() address = soup.find('li', class_='restaurant-details__heading--address').text.strip() # 手機號碼處理 phone_tag = soup.find('a', {'data-event': 'CTA_tel'}) phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A' description = soup.find('div', class_='restaurant-details__description--text').text.strip() # 將抓取的數據添加到列表中 df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description}) # 更新進度條 progress.progress((i + 1) / len(urls)) st.success('資料爬取完成!') # 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame df = pd.DataFrame(df_list) # 從地址中提取區域 df['District'] = df['Address'].str.extract(r'(\w+區)') # 統計每個區的商家數量 district_counts = df['District'].value_counts().reset_index() district_counts.columns = ['District', 'Count'] # 創建增強的柱狀圖 fig_bar = px.bar( district_counts, x='District', y='Count', title='各區餐廳數量分佈', color='Count', color_continuous_scale=px.colors.sequential.Viridis, text='Count' ) fig_bar.update_layout( title={ 'text': "各區餐廳數量分佈", 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top', 'font': dict(size=24, family="Arial", color="DarkSlateGray") }, xaxis_title="區域", yaxis_title="餐廳數量", xaxis=dict(tickangle=-45), plot_bgcolor='rgba(240,240,240,0.8)', paper_bgcolor='white', font=dict(family="Arial", size=14), hoverlabel=dict(bgcolor="white", font_size=14), margin=dict(l=50, r=50, t=80, b=50) ) fig_bar.update_traces( texttemplate='%{text}', textposition='outside', marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.8 ) # 創建增強的圓餅圖 fig_pie = go.Figure(data=[go.Pie( labels=district_counts['District'], values=district_counts['Count'], hole=.3, textinfo='label+percent', insidetextorientation='radial', textfont_size=14, marker=dict( colors=px.colors.qualitative.Set3, line=dict(color='#000000', width=2) ), pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']] )]) fig_pie.update_layout( title={ 'text': "各區餐廳比例", 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top', 'font': dict(size=24, family="Arial", color="DarkSlateGray") }, legend_title="區域", plot_bgcolor='rgba(240,240,240,0.8)', paper_bgcolor='white', font=dict(family="Arial", size=14), hoverlabel=dict(bgcolor="white", font_size=14), margin=dict(l=50, r=50, t=80, b=50) ) # 在 Streamlit 中顯示圖表 st.plotly_chart(fig_bar, use_container_width=True) st.plotly_chart(fig_pie, use_container_width=True) # 顯示統計結果 st.write("各區餐廳數量統計表") st.dataframe(district_counts) # 顯示原始數據框 st.write("原始餐廳資料") st.dataframe(df)