forever-sheikh commited on
Commit
7e007f9
·
verified ·
1 Parent(s): 656dfee

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -346
app.py DELETED
@@ -1,346 +0,0 @@
1
- import gradio as gr
2
- import googlemaps
3
- import osmnx as ox
4
- import geopandas as gpd
5
- import pandas as pd
6
- import requests
7
- import zipfile
8
- import os
9
- import glob
10
- import shutil
11
- import time
12
- from shapely.geometry import Point, Polygon
13
- from shapely.ops import transform
14
-
15
- # ==========================================
16
- # AUTHENTICATION
17
- # ==========================================
18
- try:
19
- API_KEY = os.environ.get("GOOGLE_API_KEY")
20
- except:
21
- API_KEY = None
22
-
23
- # ==========================================
24
- # 1. UNIVERSAL FILTER LISTS (FINAL POLISH)
25
- # ==========================================
26
-
27
- # Filters out Schools, Doctors, Industrial services, etc.
28
- UNIVERSAL_BAD_TERMS = [
29
- # Health / Medical
30
- "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
31
- "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
32
- "ambulance", "transport", "emergency", "veterinary", "vision center",
33
- # Education
34
- "school", "university", "college", "academy", "campus", "library", "learning",
35
- "student", "alum", "education", "institute", "dorm", "residence",
36
- # Services / Misc
37
- "atm", "kiosk", "redbox", "coinme", "fuel", "gas", "repair", "service",
38
- "collision", "towing", "plumbing", "hvac", "electric", "tree", "lawn",
39
- "gutter", "cleaning", "storage", "warehouse", "distribution", "mural", "statue",
40
- "part", "accessories", "hair", "salon", "studio", "barber", "spa", "nail"
41
- ]
42
-
43
- # Filters out departments inside Big Box stores
44
-
45
- DEPARTMENT_TERMS = [
46
- "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
47
- "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
48
- "money", "bank", "cafe", "bistro", "snack", "food court", "customer service",
49
- "floral", "flowers", "store on", "tire", "battery", "auto", "lube"
50
- ]
51
-
52
- # ==========================================
53
- # 2. COMPREHENSIVE STORE LIST
54
- # ==========================================
55
- # This covers every variation you requested (Walmart, Ikea, Dicks, BJs, etc)
56
- SEARCH_LIST = [
57
- # Big Box / Department
58
- "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
59
- "Nordstrom", "Costco", "Sam's Club", "BJ's Wholesale Club",
60
-
61
- # Clothing / Discount
62
- "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
63
- "Ross Dress for Less", "Burlington", "Old Navy", "DSW Designer Shoe Warehouse",
64
-
65
- # Home Improvement / Hardware
66
- "Home Depot", "Lowe's", "Ace Hardware", "Menards",
67
-
68
- # Electronics / Office
69
- "Best Buy", "Office Depot", "OfficeMax", "Staples",
70
-
71
- # Furniture
72
- "IKEA", "Bob's Discount Furniture", "Raymour & Flanigan", "Ashley Furniture",
73
-
74
- # Grocery
75
- "Kroger", "Meijer", "Whole Foods", "Trader Joe's", "Aldi", "Lidl",
76
- "Safeway", "Albertsons", "ShopRite", "Stop & Shop", "Publix", "Wegmans",
77
-
78
- # Hobbies / Pets / Sporting
79
- "Dick's Sporting Goods", "Bass Pro Shops", "Cabela's", "REI",
80
- "Michaels", "Hobby Lobby", "Barnes & Noble",
81
- "PetSmart", "Petco"
82
- ]
83
-
84
- BRAND_FLOORS = {
85
- "Macy's": 2, "JCPenney": 2, "Nordstrom": 2, "Sears": 2, "IKEA": 2,
86
- "Target": 1, "Walmart": 1, "Costco": 1, "Home Depot": 1, "Lowe's": 1,
87
- "Barnes & Noble": 1, "Dick's Sporting Goods": 1, "Kohl's": 1
88
- }
89
-
90
- BRAND_AVG_AREA = {
91
- "IKEA": 28000, "Walmart": 15000, "Costco": 14000, "Sam's Club": 13000,
92
- "Meijer": 18000, "Target": 12000, "Home Depot": 10000, "Lowe's": 10000,
93
- "Kroger": 6000, "Safeway": 5000, "Whole Foods": 4000, "Macy's": 16000,
94
- "JCPenney": 10000, "Sears": 12000, "Kohl's": 8000, "Dick's Sporting Goods": 4500,
95
- "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
96
- "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
97
- "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
98
- "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000,
99
- "Hobby Lobby": 5000, "BJ's Wholesale Club": 10000
100
- }
101
-
102
- # ==========================================
103
- # HELPER FUNCTIONS
104
- # ==========================================
105
- def load_geodata_to_polygon(file_obj):
106
- extract_path = "temp_extract"
107
- if os.path.exists(extract_path):
108
- shutil.rmtree(extract_path)
109
- os.makedirs(extract_path)
110
-
111
- target_kml = None
112
- try:
113
- # HANDLING KML AND KMZ HERE
114
- if file_obj.name.lower().endswith('.kmz'):
115
- with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
116
- zip_ref.extractall(extract_path)
117
- kml_files = glob.glob(extract_path + "/**/*.kml", recursive=True)
118
- if kml_files:
119
- target_kml = kml_files[0]
120
- elif file_obj.name.lower().endswith('.kml'):
121
- target_kml = file_obj.name
122
-
123
- if target_kml:
124
- gdf = gpd.read_file(target_kml)
125
-
126
- # FORCE 2D FIX (Prevents crashes on 3D KMLs)
127
- def force_2d(geometry):
128
- if geometry.has_z:
129
- return transform(lambda x, y, z=None: (x, y), geometry)
130
- return geometry
131
-
132
- gdf.geometry = gdf.geometry.apply(force_2d)
133
- return gdf.union_all()
134
- except:
135
- return None
136
- return None
137
-
138
- def get_roof_area(lat, lng, api_key):
139
- base_url = "https://solar.googleapis.com/v1/buildingInsights:findClosest"
140
- params = {"location.latitude": lat, "location.longitude": lng, "requiredQuality": "HIGH", "key": api_key}
141
- try:
142
- resp = requests.get(base_url, params=params)
143
- data = resp.json()
144
- if 'error' in data: return None
145
- return data.get('solarPotential', {}).get('wholeRoofStats', {}).get('areaMeters2', None)
146
- except:
147
- return None
148
-
149
- def get_osm_physics(lat, lng):
150
- try:
151
- tags = {'building': True}
152
- gdf = ox.features.features_from_point((lat, lng), tags, dist=60)
153
- if not gdf.empty:
154
- gdf_proj = gdf.to_crs(epsg=3857)
155
- gdf_proj['area_m2'] = gdf_proj.geometry.area
156
- best = gdf_proj.sort_values(by='area_m2', ascending=False).iloc[0]
157
-
158
- floors = None
159
- if 'building:levels' in best and pd.notna(best['building:levels']):
160
- try: floors = int(float(str(best['building:levels']).split(';')[0]))
161
- except: pass
162
-
163
- height = None
164
- if 'height' in best and pd.notna(best['height']):
165
- try: height = float(str(best['height']).replace('m','').strip())
166
- except: pass
167
- return height, floors
168
- except:
169
- pass
170
- return None, None
171
-
172
- # ==========================================
173
- # MAIN LOGIC
174
- # ==========================================
175
- def process_data(file_obj):
176
- if not API_KEY:
177
- yield "❌ API Key not found! Set GOOGLE_API_KEY in Secrets.", None
178
- return
179
-
180
- if file_obj is None:
181
- yield "❌ Please upload a file.", None
182
- return
183
-
184
- yield "📂 Loading Polygon...", None
185
- polygon = load_geodata_to_polygon(file_obj)
186
-
187
- if not polygon:
188
- yield "❌ Failed to read KML/KMZ file.", None
189
- return
190
-
191
- # --- CHECK AREA LIMIT HERE (250,000,000 sq m) ---
192
- gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
193
- gs_proj = gs.to_crs(epsg=6933)
194
- area_sq_meters = gs_proj.area.iloc[0]
195
- if area_sq_meters > 250_000_000:
196
- yield f"⚠️ AREA TOO LARGE: {area_sq_meters:,.0f} sq m. (Limit: 250M).", None
197
- return
198
-
199
- gmaps = googlemaps.Client(key=API_KEY)
200
- results = []
201
- seen_ids = set()
202
- total_brands = len(SEARCH_LIST)
203
-
204
- # 1. SEARCH LOOP
205
- for i, brand in enumerate(SEARCH_LIST):
206
- yield f"🔍 Scanning Brand {i+1}/{total_brands}: {brand}...", None
207
-
208
- try:
209
- places = gmaps.places_nearby(
210
- location=(polygon.centroid.y, polygon.centroid.x),
211
- radius=10000,
212
- keyword=brand
213
- )
214
-
215
- all_results = places.get('results', [])
216
- while 'next_page_token' in places:
217
- time.sleep(2)
218
- places = gmaps.places_nearby(
219
- location=(polygon.centroid.y, polygon.centroid.x),
220
- radius=10000,
221
- keyword=brand,
222
- page_token=places['next_page_token']
223
- )
224
- all_results.extend(places.get('results', []))
225
-
226
- for p in all_results:
227
- pid = p.get('place_id')
228
- if pid in seen_ids: continue
229
-
230
- name = p.get('name')
231
- name_clean = name.lower().replace("'", "").replace(".", "")
232
- brand_clean = brand.lower().replace("'", "").replace(".", "")
233
-
234
- # A. UNIVERSAL NAME CHECK
235
- if brand_clean not in name_clean:
236
- if brand_clean == "tjx" and "t.j. maxx" in name_clean: pass
237
- elif brand_clean == "lowe" and "lowe's" in name_clean: pass
238
- else: continue
239
-
240
- # B. UNIVERSAL BAD WORD FILTER (Strict)
241
- if any(term in name_clean for term in UNIVERSAL_BAD_TERMS): continue
242
-
243
- lat = p['geometry']['location']['lat']
244
- lng = p['geometry']['location']['lng']
245
-
246
- # C. STRICT CONTAINMENT
247
- if not polygon.contains(Point(lng, lat)): continue
248
- seen_ids.add(pid)
249
-
250
- # FETCH DATA
251
- roof_area = get_roof_area(lat, lng, API_KEY)
252
- height, floors = get_osm_physics(lat, lng)
253
-
254
- # DATA FILLING
255
- source_note = "SolarAPI"
256
- if roof_area is None:
257
- roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
258
- source_note = "Brand_Avg (Missing)"
259
- else:
260
- # Universal Mall Logic
261
- if roof_area > 30000 and brand not in ["IKEA", "Costco", "Meijer", "Sam's Club", "Walmart"]:
262
- roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
263
- source_note = "Brand_Avg (Mall detected)"
264
- elif roof_area < 500 and brand not in ["Ace Hardware", "Trader Joe's"]:
265
- roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
266
- source_note = "Brand_Avg (Too Small detected)"
267
-
268
- if floors is None: floors = BRAND_FLOORS.get(brand, 1)
269
- if height is None: height = floors * 6.0
270
-
271
- results.append({
272
- 'Name': name,
273
- 'Brand': brand,
274
- 'Latitude': lat,
275
- 'Longitude': lng,
276
- 'Height_m': round(height, 2),
277
- 'Num_Floors': int(floors),
278
- 'Area_sqm': round(roof_area, 2),
279
- 'Data_Source': source_note
280
- })
281
- except:
282
- pass
283
-
284
- if not results:
285
- yield "❌ No stores found in this area.", None
286
- return
287
-
288
- # ==========================================
289
- # 2. UNIVERSAL POST-PROCESSING
290
- # ==========================================
291
- yield "🧹 Performing Universal Deduplication...", None
292
-
293
- df = pd.DataFrame(results)
294
-
295
- # A. Remove Departments (Target Grocery, Meijer Deli, Kroger Floral)
296
- df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
297
-
298
- # B. Spatial Deduplication (Group by Brand + Location)
299
- # Creates a grid ID approx 11 meters.
300
- df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
301
-
302
- # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
303
- df['Name_Len'] = df['Name'].str.len()
304
- df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
305
-
306
- # Drop duplicates, keeping the shortest name
307
- df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
308
- df = df.drop(columns=['Loc_ID', 'Name_Len'])
309
-
310
- # C. Universal Strip Mall Splitter
311
- df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
312
-
313
- df['Final_Area_sqm'] = df.apply(
314
- lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
315
- axis=1
316
- )
317
-
318
- df['Data_Source'] = df.apply(
319
- lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
320
- axis=1
321
- )
322
-
323
- # Clean Export
324
- final_cols = ['Name', 'Brand', 'Latitude', 'Longitude', 'Height_m', 'Num_Floors', 'Final_Area_sqm', 'Data_Source']
325
- df_final = df[final_cols].rename(columns={'Final_Area_sqm': 'Area_sqm'})
326
-
327
- output_path = "Universal_Building_Inventory.csv"
328
- df_final.to_csv(output_path, index=False)
329
-
330
- yield f"✅ Success! Found {len(df_final)} unique commercial assets.", output_path
331
-
332
- # ==========================================
333
- # GRADIO INTERFACE
334
- # ==========================================
335
- iface = gr.Interface(
336
- fn=process_data,
337
- inputs=gr.File(label="Upload Polygon (KML/KMZ) - - Limit 250,000,000 Sq meters area"),
338
- outputs=[
339
- gr.Textbox(label="Status Log"),
340
- gr.File(label="Download CSV")
341
- ],
342
- title="🌎 Universal Commercial Asset Scanner",
343
- description="Upload any KML/KMZ. Scans for 50+ Big Box Brands, strip malls. Using Places API, Solar API and OpenStreetMaps API"
344
- )
345
-
346
- iface.launch()