Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import folium | |
| from folium.plugins import MarkerCluster, HeatMap | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from geopy.geocoders import Nominatim | |
| import re | |
| import streamlit as st | |
| # Streamlit title and description | |
| st.title("米其林餐廳指南爬蟲") | |
| st.write("Extract restaurant data, visualize with charts, and display locations on maps.") | |
| # Read data from Google Sheets | |
| sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U" | |
| df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") | |
| # Convert "網址" column to a Python list | |
| urls = df1["網址"].tolist() | |
| # Create a DataFrame to store all restaurant data | |
| df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Latitude", "Longitude", "Region"]) | |
| # Initialize Nominatim geocoder | |
| geolocator = Nominatim(user_agent="my_app") | |
| # Function to extract region (區域) from the address using regex | |
| def extract_region(address): | |
| match = re.search(r'(.*?)區|縣|市', address) | |
| if match: | |
| return match.group(0) | |
| else: | |
| return "Unknown" | |
| # Function to fetch and parse data | |
| def fetch_data(): | |
| global df | |
| # Progress bar in Streamlit | |
| progress_bar = st.progress(0) | |
| total_urls = len(urls) | |
| # Iterate through each URL | |
| for idx, url in enumerate(urls): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| try: | |
| store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip() | |
| except AttributeError: | |
| store_name = None | |
| try: | |
| address = soup.find("li", class_="restaurant-details__heading--address").text.strip() | |
| region = extract_region(address) | |
| except AttributeError: | |
| address = None | |
| region = "Unknown" | |
| try: | |
| phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "") | |
| except AttributeError: | |
| phone = None | |
| try: | |
| location = geolocator.geocode(address) | |
| if location: | |
| latitude = location.latitude | |
| longitude = location.longitude | |
| else: | |
| latitude = None | |
| longitude = None | |
| except: | |
| latitude = None | |
| longitude = None | |
| new_row = pd.DataFrame({ | |
| "Store Name": [store_name], | |
| "Address": [address], | |
| "Phone": [phone], | |
| "Latitude": [latitude], | |
| "Longitude": [longitude], | |
| "Region": [region] | |
| }) | |
| df = pd.concat([df, new_row], ignore_index=True) | |
| # Update progress bar | |
| progress_bar.progress((idx + 1) / total_urls) | |
| # Button to trigger data fetching | |
| if st.button("爬取餐廳資料"): | |
| fetch_data() | |
| # Save the DataFrame to CSV with UTF-8 encoding, including latitude and longitude | |
| csv_file = "restaurants_data.csv" | |
| df.to_csv(csv_file, encoding="utf-8-sig", index=False) | |
| # Display the DataFrame as a table at the top | |
| st.subheader("Restaurant Data") | |
| st.dataframe(df) | |
| # Display download button for the CSV | |
| st.download_button( | |
| label="Download restaurant data as CSV", | |
| data=open(csv_file, "rb").read(), | |
| file_name=csv_file, | |
| mime="text/csv" | |
| ) | |
| # Group the data by region and sum the number of restaurants | |
| region_group = df.groupby("Region").size().reset_index(name='Count') | |
| # Plot enlarged pie chart with custom colors and labels | |
| pie_chart = go.Figure(go.Pie( | |
| labels=region_group["Region"], | |
| values=region_group["Count"], | |
| textinfo="label+percent", | |
| hoverinfo="label+value", | |
| textfont=dict(size=18), | |
| marker=dict(colors=px.colors.qualitative.Set3, line=dict(color='#000000', width=2)) | |
| )) | |
| pie_chart.update_layout( | |
| title="Restaurant Distribution by Region", | |
| title_x=0.5, | |
| title_font=dict(size=24, family="Arial"), | |
| height=600, | |
| margin=dict(t=50, b=50, l=50, r=50) | |
| ) | |
| st.subheader("Restaurant Distribution by Region (Enlarged Pie Chart)") | |
| st.plotly_chart(pie_chart) | |
| # Plot bar chart with custom colors and labels | |
| bar_chart = go.Figure(go.Bar( | |
| x=region_group["Region"], | |
| y=region_group["Count"], | |
| text=region_group["Count"], | |
| textposition='auto', | |
| marker=dict(color=px.colors.qualitative.Set2) | |
| )) | |
| bar_chart.update_layout( | |
| title="Restaurant Count by Region", | |
| title_x=0.5, | |
| title_font=dict(size=24, family="Arial"), | |
| height=400, | |
| margin=dict(t=50, b=50, l=50, r=50), | |
| xaxis_title="Region", | |
| yaxis_title="Number of Restaurants", | |
| xaxis=dict(tickangle=-45) | |
| ) | |
| st.subheader("Restaurant Count by Region (Bar Chart)") | |
| st.plotly_chart(bar_chart) | |
| # Display a map using Folium | |
| st.subheader("Restaurant Locations Map") | |
| # Create map centered around the mean latitude and longitude | |
| m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10) | |
| # Add marker cluster to the map | |
| marker_cluster = MarkerCluster().add_to(m) | |
| for index, row in df.iterrows(): | |
| if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]): | |
| folium.Marker( | |
| location=[row["Latitude"], row["Longitude"]], | |
| popup=f"{row['Store Name']} ({row['Phone']})", | |
| tooltip=row["Address"] | |
| ).add_to(marker_cluster) | |
| # Display the map in Streamlit | |
| st.components.v1.html(m._repr_html_(), height=600) | |
| # New section for heatmap | |
| st.header("餐廳分布熱力圖") | |
| # Prepare data for heatmap | |
| heat_data = [[row['Latitude'], row['Longitude']] for index, row in df.iterrows() if pd.notnull(row['Latitude']) and pd.notnull(row['Longitude'])] | |
| # Create a new map for the heatmap | |
| heatmap = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10) | |
| # Add heatmap to the map | |
| HeatMap(heat_data).add_to(heatmap) | |
| # Display the heatmap in Streamlit | |
| st.components.v1.html(heatmap._repr_html_(), height=600) | |
| # Regional restaurant count analysis | |
| st.header("各區域餐廳數量分析") | |
| # Create bar chart for restaurant count by region using Plotly Express | |
| fig_bar = px.bar(region_group, x='Region', y='Count', | |
| title="各區域餐廳數量比較", | |
| color='Count', | |
| color_continuous_scale=px.colors.sequential.Viridis) | |
| st.plotly_chart(fig_bar) | |
| # Create a scatter mapbox for individual restaurant locations | |
| fig_scatter = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", | |
| hover_name="Store Name", | |
| hover_data=["Address", "Phone"], | |
| zoom=10, height=600, | |
| title="餐廳位置分布圖") | |
| fig_scatter.update_layout(mapbox_style="open-street-map") | |
| st.plotly_chart(fig_scatter) | |
| # |