Spaces:
Build error
Build error
| import pandas as pd | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import streamlit as st | |
| # Streamlit 應用程序標題 | |
| st.title("各區餐廳數量分佈分析") | |
| # 從 Google 試算表中讀取 URLs | |
| sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk" | |
| urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") | |
| # 將 URLs 轉換為列表 | |
| urls = urls_df['網址'].tolist() # 假設表格中的 URL 列名為"網址" | |
| # 創建按鈕並定義處理邏輯 | |
| if st.button("點擊開始爬取資料並生成圖表"): | |
| with st.spinner('正在爬取資料...'): | |
| progress = st.progress(0) | |
| # 初始化一個空的 DataFrame 列表來存儲所有數據 | |
| df_list = [] | |
| # 迭代每個網址並爬取數據 | |
| for i, url in enumerate(urls): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # 解析並抓取所需數據 | |
| title = soup.find('h1', class_='restaurant-details__heading--title').text.strip() | |
| address = soup.find('li', class_='restaurant-details__heading--address').text.strip() | |
| # 手機號碼處理 | |
| phone_tag = soup.find('a', {'data-event': 'CTA_tel'}) | |
| phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A' | |
| description = soup.find('div', class_='restaurant-details__description--text').text.strip() | |
| # 將抓取的數據添加到列表中 | |
| df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description}) | |
| # 更新進度條 | |
| progress.progress((i + 1) / len(urls)) | |
| st.success('資料爬取完成!') | |
| # 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame | |
| df = pd.DataFrame(df_list) | |
| # 從地址中提取區域 | |
| df['District'] = df['Address'].str.extract(r'(\w+區)') | |
| # 統計每個區的商家數量 | |
| district_counts = df['District'].value_counts().reset_index() | |
| district_counts.columns = ['District', 'Count'] | |
| # 創建增強的柱狀圖 | |
| fig_bar = px.bar( | |
| district_counts, | |
| x='District', | |
| y='Count', | |
| title='各區餐廳數量分佈', | |
| color='Count', | |
| color_continuous_scale=px.colors.sequential.Viridis, | |
| text='Count' | |
| ) | |
| fig_bar.update_layout( | |
| title={ | |
| 'text': "各區餐廳數量分佈", | |
| 'y':0.95, | |
| 'x':0.5, | |
| 'xanchor': 'center', | |
| 'yanchor': 'top', | |
| 'font': dict(size=24, family="Arial", color="DarkSlateGray") | |
| }, | |
| xaxis_title="區域", | |
| yaxis_title="餐廳數量", | |
| xaxis=dict(tickangle=-45), | |
| plot_bgcolor='rgba(240,240,240,0.8)', | |
| paper_bgcolor='white', | |
| font=dict(family="Arial", size=14), | |
| hoverlabel=dict(bgcolor="white", font_size=14), | |
| margin=dict(l=50, r=50, t=80, b=50) | |
| ) | |
| fig_bar.update_traces( | |
| texttemplate='%{text}', | |
| textposition='outside', | |
| marker_line_color='rgb(8,48,107)', | |
| marker_line_width=1.5, | |
| opacity=0.8 | |
| ) | |
| # 創建增強的圓餅圖 | |
| fig_pie = go.Figure(data=[go.Pie( | |
| labels=district_counts['District'], | |
| values=district_counts['Count'], | |
| hole=.3, | |
| textinfo='label+percent', | |
| insidetextorientation='radial', | |
| textfont_size=14, | |
| marker=dict( | |
| colors=px.colors.qualitative.Set3, | |
| line=dict(color='#000000', width=2) | |
| ), | |
| pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']] | |
| )]) | |
| fig_pie.update_layout( | |
| title={ | |
| 'text': "各區餐廳比例", | |
| 'y':0.95, | |
| 'x':0.5, | |
| 'xanchor': 'center', | |
| 'yanchor': 'top', | |
| 'font': dict(size=24, family="Arial", color="DarkSlateGray") | |
| }, | |
| legend_title="區域", | |
| plot_bgcolor='rgba(240,240,240,0.8)', | |
| paper_bgcolor='white', | |
| font=dict(family="Arial", size=14), | |
| hoverlabel=dict(bgcolor="white", font_size=14), | |
| margin=dict(l=50, r=50, t=80, b=50) | |
| ) | |
| # 在 Streamlit 中顯示圖表 | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # 顯示統計結果 | |
| st.write("各區餐廳數量統計表") | |
| st.dataframe(district_counts) | |
| # 顯示原始數據框 | |
| st.write("原始餐廳資料") | |
| st.dataframe(df) |