Update app.py
Browse files
app.py
CHANGED
|
@@ -7,12 +7,15 @@ import base64
|
|
| 7 |
import time
|
| 8 |
import overpy
|
| 9 |
from geopy.geocoders import Nominatim
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# -------------------------------------------------------------------
|
| 12 |
# Setup
|
| 13 |
# -------------------------------------------------------------------
|
| 14 |
geolocator = Nominatim(user_agent="hf-saas-dashboard")
|
| 15 |
api = overpy.Overpass()
|
|
|
|
| 16 |
|
| 17 |
# -------------------------------------------------------------------
|
| 18 |
# Helpers
|
|
@@ -64,61 +67,93 @@ def fetch_places(amenities, bbox):
|
|
| 64 |
result = api.query(query)
|
| 65 |
rows = []
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
tags = node.tags
|
| 70 |
-
rows.append({
|
| 71 |
"name": tags.get("name", ""),
|
| 72 |
"amenity": tags.get("amenity", ""),
|
| 73 |
-
"lat":
|
| 74 |
-
"lon":
|
| 75 |
-
"address": ", ".join(v for k, v in tags.items() if k.startswith("addr:")),
|
| 76 |
"phone": tags.get("phone", ""),
|
| 77 |
"website": tags.get("website", ""),
|
| 78 |
-
"
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
# Ways
|
| 83 |
for way in result.ways:
|
| 84 |
-
tags
|
| 85 |
-
center_lat = getattr(way, "center_lat", None)
|
| 86 |
-
center_lon = getattr(way, "center_lon", None)
|
| 87 |
-
rows.append({
|
| 88 |
-
"name": tags.get("name", ""),
|
| 89 |
-
"amenity": tags.get("amenity", ""),
|
| 90 |
-
"lat": center_lat,
|
| 91 |
-
"lon": center_lon,
|
| 92 |
-
"address": ", ".join(v for k, v in tags.items() if k.startswith("addr:")),
|
| 93 |
-
"phone": tags.get("phone", ""),
|
| 94 |
-
"website": tags.get("website", ""),
|
| 95 |
-
"opening_hours": tags.get("opening_hours", ""),
|
| 96 |
-
"osm_id": f"way/{way.id}",
|
| 97 |
-
})
|
| 98 |
|
| 99 |
-
# Relations
|
| 100 |
for rel in result.relations:
|
| 101 |
-
tags
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
return df
|
| 120 |
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
def df_to_csv_bytes(df):
|
| 123 |
return df.to_csv(index=False).encode("utf-8")
|
| 124 |
|
|
@@ -126,6 +161,7 @@ def df_to_csv_bytes(df):
|
|
| 126 |
def make_map(df, center_bbox=None):
|
| 127 |
if df.empty:
|
| 128 |
return folium.Map(location=[20, 0], zoom_start=2)._repr_html_()
|
|
|
|
| 129 |
if center_bbox:
|
| 130 |
south, west, north, east = center_bbox
|
| 131 |
center_lat = (south + north) / 2
|
|
@@ -133,19 +169,31 @@ def make_map(df, center_bbox=None):
|
|
| 133 |
else:
|
| 134 |
center_lat = df["lat"].mean()
|
| 135 |
center_lon = df["lon"].mean()
|
|
|
|
| 136 |
m = folium.Map(location=[center_lat, center_lon], zoom_start=8)
|
| 137 |
cluster = MarkerCluster().add_to(m)
|
|
|
|
| 138 |
for _, row in df.iterrows():
|
| 139 |
-
popup_html = f"
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
return m._repr_html_()
|
| 142 |
|
|
|
|
| 143 |
# -------------------------------------------------------------------
|
| 144 |
# Gradio Callbacks
|
| 145 |
# -------------------------------------------------------------------
|
| 146 |
COUNTRY_NAMES, COUNTRY_CODES = list_countries()
|
| 147 |
|
| 148 |
-
|
| 149 |
def update_states(selected_country_name):
|
| 150 |
try:
|
| 151 |
idx = COUNTRY_NAMES.index(selected_country_name)
|
|
@@ -172,40 +220,46 @@ def run_search(country_name, state_name, categories):
|
|
| 172 |
return "Please select at least one category.", None, None, None
|
| 173 |
|
| 174 |
df = fetch_places(categories, bbox)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
map_html = make_map(df, center_bbox=bbox)
|
| 176 |
csv_bytes = df_to_csv_bytes(df)
|
| 177 |
|
| 178 |
elapsed = time.time() - start
|
| 179 |
-
msg = f"Found {len(df)} places
|
| 180 |
|
| 181 |
-
# Download link
|
| 182 |
csv_b64 = base64.b64encode(csv_bytes).decode("utf-8")
|
| 183 |
csv_href = f'<a href="data:text/csv;base64,{csv_b64}" download="results.csv">๐ฅ Download CSV</a>'
|
| 184 |
|
| 185 |
return msg, df, map_html, csv_href
|
| 186 |
|
|
|
|
| 187 |
# -------------------------------------------------------------------
|
| 188 |
# Build Gradio UI
|
| 189 |
# -------------------------------------------------------------------
|
| 190 |
place_options = ["cafe", "motel", "hotel", "restaurant", "bar", "pub", "bakery", "fast_food", "guest_house", "hostel"]
|
| 191 |
|
| 192 |
with gr.Blocks() as demo:
|
| 193 |
-
gr.Markdown("# ๐ Client
|
| 194 |
|
| 195 |
with gr.Row():
|
| 196 |
with gr.Column(scale=1):
|
| 197 |
country = gr.Dropdown(choices=COUNTRY_NAMES, value="United States", label="Country")
|
| 198 |
-
|
| 199 |
-
# Default US states
|
| 200 |
us_names, _ = list_subdivisions("US")
|
| 201 |
state = gr.Dropdown(choices=us_names, value=(us_names[0] if us_names else None), label="State / Subdivision")
|
| 202 |
|
| 203 |
update_btn = gr.Button("Refresh states")
|
| 204 |
-
categories = gr.CheckboxGroup(place_options, label="Categories
|
| 205 |
-
search_btn = gr.Button("Search
|
| 206 |
|
| 207 |
info = gr.Textbox(label="Status", interactive=False)
|
| 208 |
-
download = gr.HTML(label="Download
|
| 209 |
|
| 210 |
with gr.Column(scale=2):
|
| 211 |
map_html_out = gr.HTML(label="Map")
|
|
|
|
| 7 |
import time
|
| 8 |
import overpy
|
| 9 |
from geopy.geocoders import Nominatim
|
| 10 |
+
import requests
|
| 11 |
+
from bs4 import BeautifulSoup
|
| 12 |
|
| 13 |
# -------------------------------------------------------------------
|
| 14 |
# Setup
|
| 15 |
# -------------------------------------------------------------------
|
| 16 |
geolocator = Nominatim(user_agent="hf-saas-dashboard")
|
| 17 |
api = overpy.Overpass()
|
| 18 |
+
GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY" # put your key here
|
| 19 |
|
| 20 |
# -------------------------------------------------------------------
|
| 21 |
# Helpers
|
|
|
|
| 67 |
result = api.query(query)
|
| 68 |
rows = []
|
| 69 |
|
| 70 |
+
def parse_tags(tags, lat, lon, osm_id):
|
| 71 |
+
return {
|
|
|
|
|
|
|
| 72 |
"name": tags.get("name", ""),
|
| 73 |
"amenity": tags.get("amenity", ""),
|
| 74 |
+
"lat": lat,
|
| 75 |
+
"lon": lon,
|
|
|
|
| 76 |
"phone": tags.get("phone", ""),
|
| 77 |
"website": tags.get("website", ""),
|
| 78 |
+
"osm_id": osm_id,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
for node in result.nodes:
|
| 82 |
+
rows.append(parse_tags(node.tags, node.lat, node.lon, f"node/{node.id}"))
|
| 83 |
|
|
|
|
| 84 |
for way in result.ways:
|
| 85 |
+
rows.append(parse_tags(way.tags, getattr(way, "center_lat", None), getattr(way, "center_lon", None), f"way/{way.id}"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
|
|
|
| 87 |
for rel in result.relations:
|
| 88 |
+
rows.append(parse_tags(rel.tags, getattr(rel, "center_lat", None), getattr(rel, "center_lon", None), f"relation/{rel.id}"))
|
| 89 |
+
|
| 90 |
+
return pd.DataFrame(rows).dropna(subset=["lat", "lon"])
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# -------------------------------
|
| 94 |
+
# Google API Enrichment
|
| 95 |
+
# -------------------------------
|
| 96 |
+
def enrich_with_google_api(df):
|
| 97 |
+
"""Enrich OSM results with Google Places API (phone + website)."""
|
| 98 |
+
for i, row in df.iterrows():
|
| 99 |
+
if row["phone"] and row["website"]:
|
| 100 |
+
continue # already have data
|
| 101 |
+
|
| 102 |
+
query = f"{row['name']} near {row['lat']},{row['lon']}"
|
| 103 |
+
url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
|
| 104 |
+
params = {"query": query, "key": GOOGLE_API_KEY}
|
| 105 |
+
r = requests.get(url, params=params)
|
| 106 |
+
data = r.json()
|
| 107 |
+
|
| 108 |
+
if data.get("results"):
|
| 109 |
+
place_id = data["results"][0]["place_id"]
|
| 110 |
+
|
| 111 |
+
# Fetch details
|
| 112 |
+
details_url = "https://maps.googleapis.com/maps/api/place/details/json"
|
| 113 |
+
d_params = {"place_id": place_id, "fields": "formatted_phone_number,website", "key": GOOGLE_API_KEY}
|
| 114 |
+
d = requests.get(details_url, params=d_params).json()
|
| 115 |
|
| 116 |
+
details = d.get("result", {})
|
| 117 |
+
df.at[i, "phone"] = details.get("formatted_phone_number", row["phone"])
|
| 118 |
+
df.at[i, "website"] = details.get("website", row["website"])
|
| 119 |
return df
|
| 120 |
|
| 121 |
|
| 122 |
+
# -------------------------------
|
| 123 |
+
# Google Scraper Fallback (โ ๏ธ not ToS-compliant)
|
| 124 |
+
# -------------------------------
|
| 125 |
+
def scrape_google_maps(name, lat, lon):
|
| 126 |
+
"""Scrape Google Maps web UI for phone number (last resort, experimental)."""
|
| 127 |
+
search_url = f"https://www.google.com/maps/search/{name}/@{lat},{lon},15z"
|
| 128 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 129 |
+
r = requests.get(search_url, headers=headers)
|
| 130 |
+
|
| 131 |
+
if r.status_code != 200:
|
| 132 |
+
return None
|
| 133 |
+
|
| 134 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 135 |
+
# โ ๏ธ This selector is fragile and may break anytime
|
| 136 |
+
phone = None
|
| 137 |
+
for span in soup.find_all("span"):
|
| 138 |
+
if "+" in span.text and any(c.isdigit() for c in span.text):
|
| 139 |
+
phone = span.text.strip()
|
| 140 |
+
break
|
| 141 |
+
return phone
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def enrich_with_scraper(df):
|
| 145 |
+
for i, row in df.iterrows():
|
| 146 |
+
if row["phone"]:
|
| 147 |
+
continue
|
| 148 |
+
phone = scrape_google_maps(row["name"], row["lat"], row["lon"])
|
| 149 |
+
if phone:
|
| 150 |
+
df.at[i, "phone"] = phone
|
| 151 |
+
return df
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# -------------------------------
|
| 155 |
+
# Visualization
|
| 156 |
+
# -------------------------------
|
| 157 |
def df_to_csv_bytes(df):
|
| 158 |
return df.to_csv(index=False).encode("utf-8")
|
| 159 |
|
|
|
|
| 161 |
def make_map(df, center_bbox=None):
|
| 162 |
if df.empty:
|
| 163 |
return folium.Map(location=[20, 0], zoom_start=2)._repr_html_()
|
| 164 |
+
|
| 165 |
if center_bbox:
|
| 166 |
south, west, north, east = center_bbox
|
| 167 |
center_lat = (south + north) / 2
|
|
|
|
| 169 |
else:
|
| 170 |
center_lat = df["lat"].mean()
|
| 171 |
center_lon = df["lon"].mean()
|
| 172 |
+
|
| 173 |
m = folium.Map(location=[center_lat, center_lon], zoom_start=8)
|
| 174 |
cluster = MarkerCluster().add_to(m)
|
| 175 |
+
|
| 176 |
for _, row in df.iterrows():
|
| 177 |
+
popup_html = f"""
|
| 178 |
+
<b>{row['name'] or 'Unnamed Place'}</b><br>
|
| 179 |
+
๐ {row['phone'] if row['phone'] else 'N/A'}<br>
|
| 180 |
+
๐ <a href="{row['website']}" target="_blank">{row['website'] or 'N/A'}</a><br>
|
| 181 |
+
๐ด {row['amenity']}
|
| 182 |
+
"""
|
| 183 |
+
folium.Marker(
|
| 184 |
+
[row["lat"], row["lon"]],
|
| 185 |
+
popup=folium.Popup(popup_html, max_width=300),
|
| 186 |
+
tooltip=row["name"] if row["name"] else row["amenity"]
|
| 187 |
+
).add_to(cluster)
|
| 188 |
+
|
| 189 |
return m._repr_html_()
|
| 190 |
|
| 191 |
+
|
| 192 |
# -------------------------------------------------------------------
|
| 193 |
# Gradio Callbacks
|
| 194 |
# -------------------------------------------------------------------
|
| 195 |
COUNTRY_NAMES, COUNTRY_CODES = list_countries()
|
| 196 |
|
|
|
|
| 197 |
def update_states(selected_country_name):
|
| 198 |
try:
|
| 199 |
idx = COUNTRY_NAMES.index(selected_country_name)
|
|
|
|
| 220 |
return "Please select at least one category.", None, None, None
|
| 221 |
|
| 222 |
df = fetch_places(categories, bbox)
|
| 223 |
+
|
| 224 |
+
# Try Google API enrichment
|
| 225 |
+
if GOOGLE_API_KEY and GOOGLE_API_KEY != "YOUR_GOOGLE_API_KEY":
|
| 226 |
+
df = enrich_with_google_api(df)
|
| 227 |
+
|
| 228 |
+
# Fallback scraper if still missing phones
|
| 229 |
+
df = enrich_with_scraper(df)
|
| 230 |
+
|
| 231 |
map_html = make_map(df, center_bbox=bbox)
|
| 232 |
csv_bytes = df_to_csv_bytes(df)
|
| 233 |
|
| 234 |
elapsed = time.time() - start
|
| 235 |
+
msg = f"Found {len(df)} places in {state_name}, {country_name}. Took {elapsed:.1f}s."
|
| 236 |
|
|
|
|
| 237 |
csv_b64 = base64.b64encode(csv_bytes).decode("utf-8")
|
| 238 |
csv_href = f'<a href="data:text/csv;base64,{csv_b64}" download="results.csv">๐ฅ Download CSV</a>'
|
| 239 |
|
| 240 |
return msg, df, map_html, csv_href
|
| 241 |
|
| 242 |
+
|
| 243 |
# -------------------------------------------------------------------
|
| 244 |
# Build Gradio UI
|
| 245 |
# -------------------------------------------------------------------
|
| 246 |
place_options = ["cafe", "motel", "hotel", "restaurant", "bar", "pub", "bakery", "fast_food", "guest_house", "hostel"]
|
| 247 |
|
| 248 |
with gr.Blocks() as demo:
|
| 249 |
+
gr.Markdown("# ๐ Hybrid Client Finder (OSM + Google API + Scraper Fallback)")
|
| 250 |
|
| 251 |
with gr.Row():
|
| 252 |
with gr.Column(scale=1):
|
| 253 |
country = gr.Dropdown(choices=COUNTRY_NAMES, value="United States", label="Country")
|
|
|
|
|
|
|
| 254 |
us_names, _ = list_subdivisions("US")
|
| 255 |
state = gr.Dropdown(choices=us_names, value=(us_names[0] if us_names else None), label="State / Subdivision")
|
| 256 |
|
| 257 |
update_btn = gr.Button("Refresh states")
|
| 258 |
+
categories = gr.CheckboxGroup(place_options, label="Categories", value=["cafe", "restaurant"])
|
| 259 |
+
search_btn = gr.Button("Search")
|
| 260 |
|
| 261 |
info = gr.Textbox(label="Status", interactive=False)
|
| 262 |
+
download = gr.HTML(label="Download CSV")
|
| 263 |
|
| 264 |
with gr.Column(scale=2):
|
| 265 |
map_html_out = gr.HTML(label="Map")
|