KKingzor commited on
Commit
dd1428d
·
verified ·
1 Parent(s): 351b2a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import urllib.parse
6
+ from geopy.geocoders import Nominatim
7
+ import plotly.express as px
8
+
9
+ # Step 1: Fetch hospital URLs and title from the list page
10
+ def get_hospital_urls_and_title(list_url):
11
+ try:
12
+ response = requests.get(list_url)
13
+ response.raise_for_status()
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+
16
+ # Fetch the title of the list page
17
+ title_element = soup.find('h1', class_='t-intro__title')
18
+ page_title = title_element.get_text(strip=True) if title_element else 'N/A'
19
+
20
+ # Fetch all hospital URLs
21
+ hospital_links = soup.find_all('a', class_='m-link')
22
+ hospital_urls = [urllib.parse.urljoin(list_url, link['href']) for link in hospital_links]
23
+
24
+ return page_title, hospital_urls
25
+ except Exception as e:
26
+ st.error(f"Error fetching hospital list: {e}")
27
+ return 'N/A', []
28
+
29
+ # Step 2: Fetch hospital details from each hospital's page
30
+ def get_hospital_details(url):
31
+ try:
32
+ response = requests.get(url)
33
+ response.raise_for_status()
34
+ soup = BeautifulSoup(response.text, 'html.parser')
35
+
36
+ # Fetch the title
37
+ title_element = soup.find('h1', class_='t-intro__title')
38
+ title = title_element.get_text(strip=True) if title_element else 'N/A'
39
+
40
+ # Fetch the phone number
41
+ phone_element = soup.find('a', class_='t-font-large')
42
+ phone = phone_element.get_text(strip=True) if phone_element else 'N/A'
43
+
44
+ # Fetch the address
45
+ address_element = soup.find('a', class_='t-font-medium')
46
+ address = address_element.get_text(strip=True) if address_element else 'N/A'
47
+
48
+ # Fetch the rating
49
+ rating_element = soup.find('span', class_='t-intro__recommand')
50
+ rating = rating_element.get_text(strip=True) if rating_element else 'N/A'
51
+
52
+ return {
53
+ 'Title': title,
54
+ 'Phone': phone,
55
+ 'Address': address,
56
+ 'Rating': rating
57
+ }
58
+ except Exception as e:
59
+ st.error(f"Error fetching details from {url}: {e}")
60
+ return {
61
+ 'Title': 'Error',
62
+ 'Phone': 'Error',
63
+ 'Address': 'Error',
64
+ 'Rating': 'Error'
65
+ }
66
+
67
+ # Main function to fetch and display hospital data based on city selection and category
68
+ def fetch_and_display_hospital_data(city_code, category_number):
69
+ base_url = 'https://www.tw-animal.com'
70
+ list_url = f'https://www.tw-animal.com/list/pet/{city_code}/{category_number}.html'
71
+
72
+ # Step 1: Get the list page title and hospital URLs
73
+ page_title, hospital_urls = get_hospital_urls_and_title(list_url)
74
+
75
+ # Step 2: Get details for each hospital
76
+ hospital_data = []
77
+ for url in hospital_urls:
78
+ details = get_hospital_details(url)
79
+ hospital_data.append(details)
80
+
81
+ # Convert the data to a DataFrame and filter the results
82
+ df = pd.DataFrame(hospital_data)
83
+ df_filtered = df.iloc[2:10].reset_index(drop=True)
84
+
85
+ return page_title, df_filtered
86
+
87
+ # Streamlit interface setup
88
+ def hospital_info_interface(city, category_number):
89
+ city_codes = {
90
+ "台北": "07",
91
+ "新北": "08",
92
+ "桃園": "09",
93
+ "台中": "12",
94
+ "台南": "17",
95
+ "高雄": "18"
96
+ }
97
+
98
+ # Validate the category number is within the valid range
99
+ if not (1 <= category_number <= 1024):
100
+ st.error("Category number must be between 1 and 1024.")
101
+ return "Error", pd.DataFrame()
102
+
103
+ city_code = city_codes[city]
104
+ page_title, df = fetch_and_display_hospital_data(city_code, str(category_number))
105
+
106
+ return page_title, df
107
+
108
+ # Geocode addresses to latitude and longitude
109
+ def geocode_addresses(df):
110
+ geolocator = Nominatim(user_agent="hospital_locator")
111
+ latitudes = []
112
+ longitudes = []
113
+
114
+ for address in df['Address']:
115
+ try:
116
+ location = geolocator.geocode(address)
117
+ if location:
118
+ latitudes.append(location.latitude)
119
+ longitudes.append(location.longitude)
120
+ else:
121
+ latitudes.append(None)
122
+ longitudes.append(None)
123
+ except Exception as e:
124
+ st.error(f"Error geocoding address {address}: {e}")
125
+ latitudes.append(None)
126
+ longitudes.append(None)
127
+
128
+ df['Latitude'] = latitudes
129
+ df['Longitude'] = longitudes
130
+ return df
131
+
132
+ # Streamlit app setup
133
+ def main():
134
+ st.title("台灣寵物醫院資料查詢")
135
+
136
+ city_list = ["台北", "新北", "桃園", "台中", "台南", "高雄"]
137
+ city = st.selectbox("選擇縣市", city_list)
138
+ category_number = st.number_input("輸入分類編號 (1-1024)", min_value=1, max_value=1024, value=1)
139
+
140
+ if st.button("查詢"):
141
+ page_title, df = hospital_info_interface(city, category_number)
142
+
143
+ st.subheader(f"頁面標��: {page_title}")
144
+ st.dataframe(df)
145
+
146
+ if not df.empty:
147
+ df = geocode_addresses(df)
148
+ fig = px.scatter_mapbox(df,
149
+ lat="Latitude",
150
+ lon="Longitude",
151
+ hover_name="Title",
152
+ hover_data=["Phone", "Address", "Rating"],
153
+ zoom=10,
154
+ height=600)
155
+ fig.update_layout(mapbox_style="open-street-map")
156
+ fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
157
+ st.plotly_chart(fig)
158
+
159
+ if __name__ == "__main__":
160
+ main()