forever-sheikh commited on
Commit
f97b33b
Β·
verified Β·
1 Parent(s): 7e007f9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +347 -0
app.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import googlemaps
3
+ import osmnx as ox
4
+ import geopandas as gpd
5
+ import pandas as pd
6
+ import requests
7
+ import zipfile
8
+ import os
9
+ import glob
10
+ import shutil
11
+ import time
12
+ from shapely.geometry import Point, Polygon
13
+ from shapely.ops import transform
14
+
15
+ # ==========================================
16
+ # AUTHENTICATION
17
+ # ==========================================
18
+ try:
19
+ API_KEY = os.environ.get("GOOGLE_API_KEY")
20
+ except:
21
+ API_KEY = None
22
+
23
+ # ==========================================
24
+ # 1. UNIVERSAL FILTER LISTS (FINAL COMPLETE VERSION)
25
+ # ==========================================
26
+ # Filters out Schools, Doctors, Parking Lots, Repairs, etc.
27
+ UNIVERSAL_BAD_TERMS = [
28
+ # Health / Medical
29
+ "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
30
+ "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
31
+ "ambulance", "transport", "emergency", "veterinary", "vision center",
32
+ "spinal cord", "urgent care", "hellomed", "spine", "program",
33
+ # Education
34
+ "school", "university", "college", "academy", "campus", "library", "learning",
35
+ "student", "alum", "education", "institute", "dorm", "residence",
36
+ # Services / Misc
37
+ "atm", "kiosk", "redbox", "coinme", "fuel", "gas", "repair", "service",
38
+ "collision", "towing", "plumbing", "hvac", "electric", "tree", "lawn",
39
+ "gutter", "cleaning", "storage", "warehouse", "distribution", "mural", "statue",
40
+ "part", "accessories", "hair", "salon", "studio", "barber", "spa", "nail",
41
+ "diamonds", "jewelers", "pllc", "llc", "parking", "drive", "cooling", "heating",
42
+ "brandy", "bike shop", "grooming"
43
+ ]
44
+
45
+ # Filters out departments inside Big Box stores (Fixes Area Splitting)
46
+ DEPARTMENT_TERMS = [
47
+ "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
48
+ "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
49
+ "money", "bank", "cafe", "bistro", "snack", "food court", "customer service",
50
+ "floral", "flowers", "store on", "tire", "battery", "auto", "lube",
51
+ "credit union", "sephora", "sunglass", "finish line", "pro desk",
52
+ "rental center", "svc drive", "inside", "at ",
53
+ "dog training"
54
+ ]
55
+
56
+ # ==========================================
57
+ # 2. COMPREHENSIVE STORE LIST
58
+ # ==========================================
59
+ SEARCH_LIST = [
60
+ # Big Box / Dept
61
+ "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
62
+ "Nordstrom", "Costco", "Sam's Club", "BJ's Wholesale Club",
63
+
64
+ # Clothing / Discount
65
+ "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
66
+ "Ross Dress for Less", "Burlington", "Old Navy", "DSW Designer Shoe Warehouse",
67
+
68
+ # Home Imp / Hardware / Furniture
69
+ "Home Depot", "Lowe's", "Ace Hardware", "Menards",
70
+ "IKEA", "Bob's Discount Furniture", "Raymour & Flanigan", "Ashley Furniture",
71
+
72
+ # Electronics / Office
73
+ "Best Buy", "Office Depot", "OfficeMax", "Staples",
74
+
75
+ # Grocery
76
+ "Kroger", "Meijer", "Whole Foods", "Trader Joe's", "Aldi", "Lidl",
77
+ "Safeway", "Albertsons", "ShopRite", "Stop & Shop", "Publix", "Wegmans",
78
+
79
+ # Hobbies / Pets / Sporting
80
+ "Dick's Sporting Goods", "Bass Pro Shops", "Cabela's", "REI",
81
+ "Michaels", "Hobby Lobby", "Barnes & Noble",
82
+ "PetSmart", "Petco"
83
+ ]
84
+
85
+ BRAND_FLOORS = {
86
+ "Macy's": 2, "JCPenney": 2, "Nordstrom": 2, "Sears": 2, "IKEA": 2,
87
+ "Target": 1, "Walmart": 1, "Costco": 1, "Home Depot": 1, "Lowe's": 1,
88
+ "Barnes & Noble": 1, "Dick's Sporting Goods": 1, "Kohl's": 1
89
+ }
90
+
91
+ BRAND_AVG_AREA = {
92
+ "IKEA": 28000, "Walmart": 15000, "Costco": 14000, "Sam's Club": 13000,
93
+ "Meijer": 18000, "Target": 12000, "Home Depot": 10000, "Lowe's": 10000,
94
+ "Kroger": 6000, "Safeway": 5000, "Whole Foods": 4000, "Macy's": 16000,
95
+ "JCPenney": 10000, "Sears": 12000, "Kohl's": 8000, "Dick's Sporting Goods": 4500,
96
+ "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
97
+ "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
98
+ "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
99
+ "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000,
100
+ "Hobby Lobby": 5000, "BJ's Wholesale Club": 10000, "REI": 4000
101
+ }
102
+
103
+ # ==========================================
104
+ # HELPER FUNCTIONS
105
+ # ==========================================
106
+ def load_geodata_to_polygon(file_obj):
107
+ extract_path = "temp_extract"
108
+ if os.path.exists(extract_path):
109
+ shutil.rmtree(extract_path)
110
+ os.makedirs(extract_path)
111
+
112
+ target_kml = None
113
+ try:
114
+ # HANDLING KML AND KMZ HERE
115
+ if file_obj.name.lower().endswith('.kmz'):
116
+ with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
117
+ zip_ref.extractall(extract_path)
118
+ kml_files = glob.glob(extract_path + "/**/*.kml", recursive=True)
119
+ if kml_files:
120
+ target_kml = kml_files[0]
121
+ elif file_obj.name.lower().endswith('.kml'):
122
+ target_kml = file_obj.name
123
+
124
+ if target_kml:
125
+ gdf = gpd.read_file(target_kml)
126
+
127
+ # FORCE 2D FIX (Prevents crashes on 3D KMLs)
128
+ def force_2d(geometry):
129
+ if geometry.has_z:
130
+ return transform(lambda x, y, z=None: (x, y), geometry)
131
+ return geometry
132
+
133
+ gdf.geometry = gdf.geometry.apply(force_2d)
134
+ return gdf.union_all()
135
+ except:
136
+ return None
137
+ return None
138
+
139
+ def get_roof_area(lat, lng, api_key):
140
+ base_url = "https://solar.googleapis.com/v1/buildingInsights:findClosest"
141
+ params = {"location.latitude": lat, "location.longitude": lng, "requiredQuality": "HIGH", "key": api_key}
142
+ try:
143
+ resp = requests.get(base_url, params=params)
144
+ data = resp.json()
145
+ if 'error' in data: return None
146
+ return data.get('solarPotential', {}).get('wholeRoofStats', {}).get('areaMeters2', None)
147
+ except:
148
+ return None
149
+
150
+ def get_osm_physics(lat, lng):
151
+ try:
152
+ tags = {'building': True}
153
+ gdf = ox.features.features_from_point((lat, lng), tags, dist=60)
154
+ if not gdf.empty:
155
+ gdf_proj = gdf.to_crs(epsg=3857)
156
+ gdf_proj['area_m2'] = gdf_proj.geometry.area
157
+ best = gdf_proj.sort_values(by='area_m2', ascending=False).iloc[0]
158
+
159
+ floors = None
160
+ if 'building:levels' in best and pd.notna(best['building:levels']):
161
+ try: floors = int(float(str(best['building:levels']).split(';')[0]))
162
+ except: pass
163
+
164
+ height = None
165
+ if 'height' in best and pd.notna(best['height']):
166
+ try: height = float(str(best['height']).replace('m','').strip())
167
+ except: pass
168
+ return height, floors
169
+ except:
170
+ pass
171
+ return None, None
172
+
173
+ # ==========================================
174
+ # MAIN LOGIC
175
+ # ==========================================
176
+ def process_data(file_obj):
177
+ if not API_KEY:
178
+ yield "❌ API Key not found! Set GOOGLE_API_KEY in Secrets.", None
179
+ return
180
+
181
+ if file_obj is None:
182
+ yield "❌ Please upload a file.", None
183
+ return
184
+
185
+ yield "πŸ“‚ Loading Polygon...", None
186
+ polygon = load_geodata_to_polygon(file_obj)
187
+
188
+ if not polygon:
189
+ yield "❌ Failed to read KML/KMZ file.", None
190
+ return
191
+
192
+ # --- CHECK AREA LIMIT HERE (250,000,000 sq m) ---
193
+ gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
194
+ gs_proj = gs.to_crs(epsg=6933)
195
+ area_sq_meters = gs_proj.area.iloc[0]
196
+ if area_sq_meters > 250_000_000:
197
+ yield f"⚠️ AREA TOO LARGE: {area_sq_meters:,.0f} sq m. (Limit: 250M).", None
198
+ return
199
+
200
+ gmaps = googlemaps.Client(key=API_KEY)
201
+ results = []
202
+ seen_ids = set()
203
+ total_brands = len(SEARCH_LIST)
204
+
205
+ # 1. SEARCH LOOP
206
+ for i, brand in enumerate(SEARCH_LIST):
207
+ yield f"πŸ” Scanning Brand {i+1}/{total_brands}: {brand}...", None
208
+
209
+ try:
210
+ places = gmaps.places_nearby(
211
+ location=(polygon.centroid.y, polygon.centroid.x),
212
+ radius=10000,
213
+ keyword=brand
214
+ )
215
+
216
+ all_results = places.get('results', [])
217
+ while 'next_page_token' in places:
218
+ time.sleep(2)
219
+ places = gmaps.places_nearby(
220
+ location=(polygon.centroid.y, polygon.centroid.x),
221
+ radius=10000,
222
+ keyword=brand,
223
+ page_token=places['next_page_token']
224
+ )
225
+ all_results.extend(places.get('results', []))
226
+
227
+ for p in all_results:
228
+ pid = p.get('place_id')
229
+ if pid in seen_ids: continue
230
+
231
+ name = p.get('name')
232
+ name_clean = name.lower().replace("'", "").replace(".", "")
233
+ brand_clean = brand.lower().replace("'", "").replace(".", "")
234
+
235
+ # A. UNIVERSAL NAME CHECK
236
+ if brand_clean not in name_clean:
237
+ if brand_clean == "tjx" and "t.j. maxx" in name_clean: pass
238
+ elif brand_clean == "lowe" and "lowe's" in name_clean: pass
239
+ else: continue
240
+
241
+ # B. UNIVERSAL BAD WORD FILTER (Strict)
242
+ if any(term in name_clean for term in UNIVERSAL_BAD_TERMS): continue
243
+
244
+ lat = p['geometry']['location']['lat']
245
+ lng = p['geometry']['location']['lng']
246
+
247
+ # C. STRICT CONTAINMENT
248
+ if not polygon.contains(Point(lng, lat)): continue
249
+ seen_ids.add(pid)
250
+
251
+ # FETCH DATA
252
+ roof_area = get_roof_area(lat, lng, API_KEY)
253
+ height, floors = get_osm_physics(lat, lng)
254
+
255
+ # DATA FILLING
256
+ source_note = "SolarAPI"
257
+ if roof_area is None:
258
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
259
+ source_note = "Brand_Avg (Missing)"
260
+ else:
261
+ # Universal Mall Logic
262
+ if roof_area > 30000 and brand not in ["IKEA", "Costco", "Meijer", "Sam's Club", "Walmart"]:
263
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
264
+ source_note = "Brand_Avg (Mall detected)"
265
+ elif roof_area < 500 and brand not in ["Ace Hardware", "Trader Joe's"]:
266
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
267
+ source_note = "Brand_Avg (Too Small detected)"
268
+
269
+ if floors is None: floors = BRAND_FLOORS.get(brand, 1)
270
+ if height is None: height = floors * 6.0
271
+
272
+ results.append({
273
+ 'Name': name,
274
+ 'Brand': brand,
275
+ 'Latitude': lat,
276
+ 'Longitude': lng,
277
+ 'Height_m': round(height, 2),
278
+ 'Num_Floors': int(floors),
279
+ 'Area_sqm': round(roof_area, 2),
280
+ 'Data_Source': source_note
281
+ })
282
+ except:
283
+ pass
284
+
285
+ if not results:
286
+ yield "❌ No stores found in this area.", None
287
+ return
288
+
289
+ # ==========================================
290
+ # 2. UNIVERSAL POST-PROCESSING
291
+ # ==========================================
292
+ yield "🧹 Performing Universal Deduplication...", None
293
+
294
+ df = pd.DataFrame(results)
295
+
296
+ # A. Remove Departments (Target Grocery, Meijer Deli, Kroger Floral)
297
+ df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
298
+
299
+ # B. Spatial Deduplication (Group by Brand + Location)
300
+ # Creates a grid ID approx 11 meters.
301
+ df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
302
+
303
+ # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
304
+ df['Name_Len'] = df['Name'].str.len()
305
+ df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
306
+
307
+ # Drop duplicates, keeping the shortest name
308
+ df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
309
+ df = df.drop(columns=['Loc_ID', 'Name_Len'])
310
+
311
+ # C. Universal Strip Mall Splitter
312
+ df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
313
+
314
+ df['Final_Area_sqm'] = df.apply(
315
+ lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
316
+ axis=1
317
+ )
318
+
319
+ df['Data_Source'] = df.apply(
320
+ lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
321
+ axis=1
322
+ )
323
+
324
+ # Clean Export
325
+ final_cols = ['Name', 'Brand', 'Latitude', 'Longitude', 'Height_m', 'Num_Floors', 'Final_Area_sqm', 'Data_Source']
326
+ df_final = df[final_cols].rename(columns={'Final_Area_sqm': 'Area_sqm'})
327
+
328
+ output_path = "Universal_Building_Inventory.csv"
329
+ df_final.to_csv(output_path, index=False)
330
+
331
+ yield f"βœ… Success! Found {len(df_final)} unique commercial assets.", output_path
332
+
333
+ # ==========================================
334
+ # GRADIO INTERFACE
335
+ # ==========================================
336
+ iface = gr.Interface(
337
+ fn=process_data,
338
+ inputs=gr.File(label="Upload Polygon (KML/KMZ) - - Limit 250,000,000 Sq meters area"),
339
+ outputs=[
340
+ gr.Textbox(label="Status Log"),
341
+ gr.File(label="Download CSV")
342
+ ],
343
+ title="🌎 Universal Commercial Asset Scanner",
344
+ description="Upload any KML/KMZ. Scans for 50+ Big Box Brands, strip malls. Using Places API, Solar API and OpenStreetMaps API"
345
+ )
346
+
347
+ iface.launch()