forever-sheikh commited on
Commit
87455cb
Β·
verified Β·
1 Parent(s): 33a99fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import googlemaps
3
+ import osmnx as ox
4
+ import geopandas as gpd
5
+ import pandas as pd
6
+ import requests
7
+ import zipfile
8
+ import os
9
+ import glob
10
+ import shutil
11
+ import time
12
+ from shapely.geometry import Point, Polygon
13
+ from shapely.ops import transform
14
+
15
+ # ==========================================
16
+ # AUTHENTICATION
17
+ # ==========================================
18
+ try:
19
+ API_KEY = os.environ.get("GOOGLE_API_KEY")
20
+ except:
21
+ API_KEY = None
22
+
23
+ # ==========================================
24
+ # HELPER FUNCTIONS
25
+ # ==========================================
26
+ def load_geodata_to_polygon(file_obj):
27
+ extract_path = "temp_extract"
28
+ if os.path.exists(extract_path):
29
+ shutil.rmtree(extract_path)
30
+ os.makedirs(extract_path)
31
+
32
+ target_kml = None
33
+ try:
34
+ if file_obj.name.lower().endswith('.kmz'):
35
+ with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
36
+ zip_ref.extractall(extract_path)
37
+ kml_files = glob.glob(extract_path + "/**/*.kml", recursive=True)
38
+ if kml_files:
39
+ target_kml = kml_files[0]
40
+ elif file_obj.name.lower().endswith('.kml'):
41
+ target_kml = file_obj.name
42
+
43
+ if target_kml:
44
+ gdf = gpd.read_file(target_kml)
45
+
46
+ # --- UNIVERSAL FIX: FORCE 2D ---
47
+ # This ensures ANY KML with height data works correctly
48
+ def force_2d(geometry):
49
+ if geometry.has_z:
50
+ return transform(lambda x, y, z=None: (x, y), geometry)
51
+ return geometry
52
+
53
+ gdf.geometry = gdf.geometry.apply(force_2d)
54
+ return gdf.union_all()
55
+ except:
56
+ return None
57
+ return None
58
+
59
+ def get_roof_area(lat, lng, api_key):
60
+ base_url = "https://solar.googleapis.com/v1/buildingInsights:findClosest"
61
+ params = {
62
+ "location.latitude": lat,
63
+ "location.longitude": lng,
64
+ "requiredQuality": "HIGH",
65
+ "key": api_key
66
+ }
67
+ try:
68
+ resp = requests.get(base_url, params=params)
69
+ data = resp.json()
70
+ if 'error' in data: return None
71
+ return data.get('solarPotential', {}).get('wholeRoofStats', {}).get('areaMeters2', None)
72
+ except:
73
+ return None
74
+
75
+ def get_osm_physics(lat, lng):
76
+ try:
77
+ tags = {'building': True}
78
+ gdf = ox.features.features_from_point((lat, lng), tags, dist=60)
79
+ if not gdf.empty:
80
+ gdf_proj = gdf.to_crs(epsg=3857)
81
+ gdf_proj['area_m2'] = gdf_proj.geometry.area
82
+ best = gdf_proj.sort_values(by='area_m2', ascending=False).iloc[0]
83
+
84
+ floors = None
85
+ if 'building:levels' in best and pd.notna(best['building:levels']):
86
+ try: floors = int(float(str(best['building:levels']).split(';')[0]))
87
+ except: pass
88
+
89
+ height = None
90
+ if 'height' in best and pd.notna(best['height']):
91
+ try: height = float(str(best['height']).replace('m','').strip())
92
+ except: pass
93
+ return height, floors
94
+ except:
95
+ pass
96
+ return None, None
97
+
98
+ # DATA CONSTANTS
99
+ BRAND_FLOORS = {
100
+ "Macy's": 2, "JCPenney": 2, "Nordstrom": 2, "Sears": 2, "IKEA": 2,
101
+ "Target": 1, "Walmart": 1, "Costco": 1, "Home Depot": 1, "Lowe's": 1,
102
+ "Barnes & Noble": 1, "Dick's Sporting Goods": 1, "Kohl's": 1
103
+ }
104
+
105
+ BRAND_AVG_AREA = {
106
+ "IKEA": 28000, "Walmart": 15000, "Costco": 14000, "Sam's Club": 13000,
107
+ "Meijer": 18000, "Target": 12000, "Home Depot": 10000, "Lowe's": 10000,
108
+ "Kroger": 6000, "Safeway": 5000, "Whole Foods": 4000, "Macy's": 16000,
109
+ "JCPenney": 10000, "Sears": 12000, "Kohl's": 8000, "Dick's Sporting Goods": 4500,
110
+ "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
111
+ "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
112
+ "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
113
+ "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
114
+ }
115
+
116
+ SEARCH_LIST = [
117
+ "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
118
+ "TJX", "TJX Companies", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
119
+ "Ross", "Ross Dress for Less", "Burlington", "Dick's Sporting Goods",
120
+ "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
121
+ "IKEA", "Bob's Furniture", "Bob's Discount Furniture", "Raymour & Flanigan",
122
+ "Barnes & Noble", "Office Depot", "OfficeMax", "Staples", "Lowe",
123
+ "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "BJ's Wholesale Club",
124
+ "Sam's Club", "Whole Foods", "ShopRite", "Stop & Shop", "Trader Joe's",
125
+ "Michaels", "Lidl", "Aldi", "DSW Designer Shoe Warehouse", "Old Navy",
126
+ "Ace", "Ace Hardware", "Hobby Lobby", "Trader Joes"
127
+ ]
128
+
129
+ # ==========================================
130
+ # MAIN LOGIC WITH GENERATOR (YIELD)
131
+ # ==========================================
132
+ def process_data(file_obj):
133
+ if not API_KEY:
134
+ yield "❌ API Key not found! Set GOOGLE_API_KEY in Secrets.", None
135
+ return
136
+
137
+ if file_obj is None:
138
+ yield "❌ Please upload a file.", None
139
+ return
140
+
141
+ yield "πŸ“‚ Loading Polygon...", None
142
+ polygon = load_geodata_to_polygon(file_obj)
143
+
144
+ if not polygon:
145
+ yield "❌ Failed to read KML/KMZ file.", None
146
+ return
147
+
148
+ # --- UNIVERSAL AREA LIMIT CHECK ---
149
+ try:
150
+ gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
151
+ gs_proj = gs.to_crs(epsg=6933)
152
+ area_sq_meters = gs_proj.area.iloc[0]
153
+ limit_sq_meters = 250_000_000
154
+
155
+ if area_sq_meters > limit_sq_meters:
156
+ yield f"⚠️ AREA TOO LARGE: {area_sq_meters:,.0f} sq m. (Limit: {limit_sq_meters:,.0f}). Upload a smaller file.", None
157
+ return
158
+ except:
159
+ pass
160
+
161
+ gmaps = googlemaps.Client(key=API_KEY)
162
+ results = []
163
+ seen_ids = set()
164
+ total_brands = len(SEARCH_LIST)
165
+
166
+ # LOOP THROUGH BRANDS
167
+ for i, brand in enumerate(SEARCH_LIST):
168
+ yield f"πŸ” Scanning Brand {i+1}/{total_brands}: {brand}...", None
169
+
170
+ try:
171
+ # --- 1. DEEP SEARCH (Pagination enabled) ---
172
+ # 10km radius + 3 Pages of results ensures we don't miss local stores
173
+ places = gmaps.places_nearby(
174
+ location=(polygon.centroid.y, polygon.centroid.x),
175
+ radius=10000,
176
+ keyword=brand
177
+ )
178
+
179
+ all_results = places.get('results', [])
180
+ while 'next_page_token' in places:
181
+ time.sleep(2)
182
+ places = gmaps.places_nearby(
183
+ location=(polygon.centroid.y, polygon.centroid.x),
184
+ radius=10000,
185
+ keyword=brand,
186
+ page_token=places['next_page_token']
187
+ )
188
+ all_results.extend(places.get('results', []))
189
+
190
+ for p in all_results:
191
+ pid = p.get('place_id')
192
+ if pid in seen_ids: continue
193
+
194
+ name = p.get('name')
195
+
196
+ # --- 2. UNIVERSAL NAME VERIFICATION ---
197
+ # Check: Is the brand name actually inside the store name?
198
+ # This prevents "Meijer" showing up when searching for "Walmart"
199
+
200
+ # Normalize strings (remove case, apostrophes, periods)
201
+ name_clean = name.lower().replace("'", "").replace(".", "")
202
+ brand_clean = brand.lower().replace("'", "").replace(".", "")
203
+
204
+ if brand_clean not in name_clean:
205
+ # Exceptions for tricky names (TJX/Lowe)
206
+ if brand_clean == "tjx" and "t.j. maxx" in name_clean: pass
207
+ elif brand_clean == "lowe" and "lowe's" in name_clean: pass
208
+ else: continue # Reject the result
209
+
210
+ # --- 3. BAD KEYWORD FILTER ---
211
+ # Filters out ATMs, Doctors, Fuel, Vision Centers, etc.
212
+ bad_terms = ["atm", "redbox", "kiosk", "coinme", "gas", "fuel", "lcsw", "dr.", "dds", "hair", "salon", "studio", "tire", "repair"]
213
+ if any(term in name_clean for term in bad_terms): continue
214
+
215
+ lat = p['geometry']['location']['lat']
216
+ lng = p['geometry']['location']['lng']
217
+
218
+ # --- 4. STRICT CONTAINMENT ---
219
+ # Only keep if strictly inside the KML polygon
220
+ if not polygon.contains(Point(lng, lat)): continue
221
+ seen_ids.add(pid)
222
+
223
+ # Get Data
224
+ roof_area = get_roof_area(lat, lng, API_KEY)
225
+ height, floors = get_osm_physics(lat, lng)
226
+
227
+ # Logic
228
+ source_note = "SolarAPI"
229
+ if roof_area is None:
230
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
231
+ source_note = "Brand_Avg (Missing)"
232
+ else:
233
+ if roof_area > 30000 and brand not in ["IKEA", "Costco", "Meijer", "Sam's Club", "Walmart"]:
234
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
235
+ source_note = "Brand_Avg (Mall detected)"
236
+ elif roof_area < 500 and brand not in ["Ace Hardware", "Trader Joe's", "GameStop"]:
237
+ roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
238
+ source_note = "Brand_Avg (Too Small detected)"
239
+
240
+ if floors is None: floors = BRAND_FLOORS.get(brand, 1)
241
+ if height is None: height = floors * 6.0
242
+
243
+ results.append({
244
+ 'Name': name, 'Brand': brand, 'Latitude': lat, 'Longitude': lng,
245
+ 'Height_m': round(height, 2), 'Num_Floors': int(floors),
246
+ 'Area_sqm': round(roof_area, 2), 'Data_Source': source_note
247
+ })
248
+ except:
249
+ pass
250
+
251
+ if not results:
252
+ yield "❌ No stores found in this area.", None
253
+ return
254
+
255
+ df = pd.DataFrame(results)
256
+
257
+ # --- FINAL CLEANUP (Sub-departments) ---
258
+ # Double check to remove things like "Walmart Vision Center"
259
+ bad_keywords = ['Mobile', 'Salon', 'Floral', 'Bakery', 'Pharmacy', 'Optical', 'Geek Squad', 'Photo', 'Tire', 'Vision']
260
+ df = df[~df['Name'].str.contains('|'.join(bad_keywords), case=False, na=False)]
261
+
262
+ output_path = "Building_Inventory.csv"
263
+ df.to_csv(output_path, index=False)
264
+
265
+ yield f"βœ… Success! Found {len(df)} stores.", output_path
266
+
267
+ # ==========================================
268
+ # GRADIO INTERFACE
269
+ # ==========================================
270
+ iface = gr.Interface(
271
+ fn=process_data,
272
+ inputs=gr.File(label="Upload Polygon (Limit: 250,000,000 sq m)"),
273
+ outputs=[
274
+ gr.Textbox(label="Status Log"),
275
+ gr.File(label="Download CSV")
276
+ ],
277
+ title="πŸ™οΈ Commercial Building Inventory Generator",
278
+ description="Upload a KMZ file. The tool will scan for major Big Box brands, check Solar API for area, and OpenStreetMap for height/floors."
279
+ )
280
+
281
+ iface.launch()