forever-sheikh commited on
Commit
d09b0df
Β·
verified Β·
1 Parent(s): c378f7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -88
app.py CHANGED
@@ -20,6 +20,55 @@ try:
20
  except:
21
  API_KEY = None
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # ==========================================
24
  # HELPER FUNCTIONS
25
  # ==========================================
@@ -43,8 +92,6 @@ def load_geodata_to_polygon(file_obj):
43
  if target_kml:
44
  gdf = gpd.read_file(target_kml)
45
 
46
- # --- UNIVERSAL FIX: FORCE 2D ---
47
- # This ensures ANY KML with height data works correctly
48
  def force_2d(geometry):
49
  if geometry.has_z:
50
  return transform(lambda x, y, z=None: (x, y), geometry)
@@ -58,12 +105,7 @@ def load_geodata_to_polygon(file_obj):
58
 
59
  def get_roof_area(lat, lng, api_key):
60
  base_url = "https://solar.googleapis.com/v1/buildingInsights:findClosest"
61
- params = {
62
- "location.latitude": lat,
63
- "location.longitude": lng,
64
- "requiredQuality": "HIGH",
65
- "key": api_key
66
- }
67
  try:
68
  resp = requests.get(base_url, params=params)
69
  data = resp.json()
@@ -95,45 +137,14 @@ def get_osm_physics(lat, lng):
95
  pass
96
  return None, None
97
 
98
- # DATA CONSTANTS
99
- BRAND_FLOORS = {
100
- "Macy's": 2, "JCPenney": 2, "Nordstrom": 2, "Sears": 2, "IKEA": 2,
101
- "Target": 1, "Walmart": 1, "Costco": 1, "Home Depot": 1, "Lowe's": 1,
102
- "Barnes & Noble": 1, "Dick's Sporting Goods": 1, "Kohl's": 1
103
- }
104
-
105
- BRAND_AVG_AREA = {
106
- "IKEA": 28000, "Walmart": 15000, "Costco": 14000, "Sam's Club": 13000,
107
- "Meijer": 18000, "Target": 12000, "Home Depot": 10000, "Lowe's": 10000,
108
- "Kroger": 6000, "Safeway": 5000, "Whole Foods": 4000, "Macy's": 16000,
109
- "JCPenney": 10000, "Sears": 12000, "Kohl's": 8000, "Dick's Sporting Goods": 4500,
110
- "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
111
- "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
112
- "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
113
- "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
114
- }
115
-
116
- SEARCH_LIST = [
117
- "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
118
- "TJX", "TJX Companies", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
119
- "Ross", "Ross Dress for Less", "Burlington", "Dick's Sporting Goods",
120
- "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
121
- "IKEA", "Bob's Furniture", "Bob's Discount Furniture", "Raymour & Flanigan",
122
- "Barnes & Noble", "Office Depot", "OfficeMax", "Staples", "Lowe",
123
- "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "BJ's Wholesale Club",
124
- "Sam's Club", "Whole Foods", "ShopRite", "Stop & Shop", "Trader Joe's",
125
- "Michaels", "Lidl", "Aldi", "DSW Designer Shoe Warehouse", "Old Navy",
126
- "Ace", "Ace Hardware", "Hobby Lobby", "Trader Joes"
127
- ]
128
-
129
  # ==========================================
130
- # MAIN LOGIC WITH GENERATOR (YIELD)
131
  # ==========================================
132
  def process_data(file_obj):
133
  if not API_KEY:
134
  yield "❌ API Key not found! Set GOOGLE_API_KEY in Secrets.", None
135
  return
136
-
137
  if file_obj is None:
138
  yield "❌ Please upload a file.", None
139
  return
@@ -145,31 +156,24 @@ def process_data(file_obj):
145
  yield "❌ Failed to read KML/KMZ file.", None
146
  return
147
 
148
- # --- UNIVERSAL AREA LIMIT CHECK ---
149
- try:
150
- gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
151
- gs_proj = gs.to_crs(epsg=6933)
152
- area_sq_meters = gs_proj.area.iloc[0]
153
- limit_sq_meters = 250_000_000
154
-
155
- if area_sq_meters > limit_sq_meters:
156
- yield f"⚠️ AREA TOO LARGE: {area_sq_meters:,.0f} sq m. (Limit: {limit_sq_meters:,.0f}). Upload a smaller file.", None
157
- return
158
- except:
159
- pass
160
 
161
  gmaps = googlemaps.Client(key=API_KEY)
162
  results = []
163
  seen_ids = set()
164
  total_brands = len(SEARCH_LIST)
165
 
166
- # LOOP THROUGH BRANDS
167
  for i, brand in enumerate(SEARCH_LIST):
168
  yield f"πŸ” Scanning Brand {i+1}/{total_brands}: {brand}...", None
169
 
170
  try:
171
- # --- 1. DEEP SEARCH (Pagination enabled) ---
172
- # 10km radius + 3 Pages of results ensures we don't miss local stores
173
  places = gmaps.places_nearby(
174
  location=(polygon.centroid.y, polygon.centroid.x),
175
  radius=10000,
@@ -192,48 +196,40 @@ def process_data(file_obj):
192
  if pid in seen_ids: continue
193
 
194
  name = p.get('name')
195
-
196
- # --- 2. UNIVERSAL NAME VERIFICATION ---
197
- # Check: Is the brand name actually inside the store name?
198
- # This prevents "Meijer" showing up when searching for "Walmart"
199
-
200
- # Normalize strings (remove case, apostrophes, periods)
201
  name_clean = name.lower().replace("'", "").replace(".", "")
202
  brand_clean = brand.lower().replace("'", "").replace(".", "")
203
 
 
204
  if brand_clean not in name_clean:
205
- # Exceptions for tricky names (TJX/Lowe)
206
  if brand_clean == "tjx" and "t.j. maxx" in name_clean: pass
207
  elif brand_clean == "lowe" and "lowe's" in name_clean: pass
208
- else: continue # Reject the result
209
-
210
- # --- 3. BAD KEYWORD FILTER ---
211
- # Filters out ATMs, Doctors, Fuel, Vision Centers, etc.
212
- bad_terms = ["atm", "redbox", "kiosk", "coinme", "gas", "fuel", "lcsw", "dr.", "dds", "hair", "salon", "studio", "tire", "repair"]
213
- if any(term in name_clean for term in bad_terms): continue
214
-
215
  lat = p['geometry']['location']['lat']
216
  lng = p['geometry']['location']['lng']
217
 
218
- # --- 4. STRICT CONTAINMENT ---
219
- # Only keep if strictly inside the KML polygon
220
  if not polygon.contains(Point(lng, lat)): continue
221
  seen_ids.add(pid)
222
 
223
- # Get Data
224
  roof_area = get_roof_area(lat, lng, API_KEY)
225
  height, floors = get_osm_physics(lat, lng)
226
 
227
- # Logic
228
  source_note = "SolarAPI"
229
  if roof_area is None:
230
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
231
  source_note = "Brand_Avg (Missing)"
232
  else:
 
233
  if roof_area > 30000 and brand not in ["IKEA", "Costco", "Meijer", "Sam's Club", "Walmart"]:
234
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
235
  source_note = "Brand_Avg (Mall detected)"
236
- elif roof_area < 500 and brand not in ["Ace Hardware", "Trader Joe's", "GameStop"]:
237
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
238
  source_note = "Brand_Avg (Too Small detected)"
239
 
@@ -241,9 +237,14 @@ def process_data(file_obj):
241
  if height is None: height = floors * 6.0
242
 
243
  results.append({
244
- 'Name': name, 'Brand': brand, 'Latitude': lat, 'Longitude': lng,
245
- 'Height_m': round(height, 2), 'Num_Floors': int(floors),
246
- 'Area_sqm': round(roof_area, 2), 'Data_Source': source_note
 
 
 
 
 
247
  })
248
  except:
249
  pass
@@ -252,30 +253,65 @@ def process_data(file_obj):
252
  yield "❌ No stores found in this area.", None
253
  return
254
 
 
 
 
 
 
255
  df = pd.DataFrame(results)
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- # --- FINAL CLEANUP (Sub-departments) ---
258
- # Double check to remove things like "Walmart Vision Center"
259
- bad_keywords = ['Mobile', 'Salon', 'Floral', 'Bakery', 'Pharmacy', 'Optical', 'Geek Squad', 'Photo', 'Tire', 'Vision']
260
- df = df[~df['Name'].str.contains('|'.join(bad_keywords), case=False, na=False)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- output_path = "Building_Inventory.csv"
263
- df.to_csv(output_path, index=False)
264
 
265
- yield f"βœ… Success! Found {len(df)} stores.", output_path
266
 
267
  # ==========================================
268
  # GRADIO INTERFACE
269
  # ==========================================
270
  iface = gr.Interface(
271
  fn=process_data,
272
- inputs=gr.File(label="Upload Polygon - - (Limit: 250,000,000 sq m)"),
273
  outputs=[
274
  gr.Textbox(label="Status Log"),
275
  gr.File(label="Download CSV")
276
  ],
277
- title="πŸ™οΈ Commercial Building Inventory Generator (Test - Check- Nokia)",
278
- description="Upload a KMZ file. The tool will scan for 50+ major Big Box brands, check Solar API, Places API, OpenStreetMap for Area + height/floors."
279
  )
280
 
281
  iface.launch()
 
20
  except:
21
  API_KEY = None
22
 
23
+ # ==========================================
24
+ # 1. UNIVERSAL FILTER LISTS (THE FIX)
25
+ # ==========================================
26
+ # Filters out Schools, Doctors, Industrial services, etc.
27
+ UNIVERSAL_BAD_TERMS = [
28
+ "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
29
+ "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
30
+ "ambulance", "transport", "emergency", "veterinary", "vision center",
31
+ "school", "university", "college", "academy", "campus", "library", "learning",
32
+ "student", "alum", "education", "institute", "dorm", "residence",
33
+ "atm", "kiosk", "redbox", "coinme", "fuel", "gas", "repair", "service",
34
+ "collision", "towing", "plumbing", "hvac", "electric", "tree", "lawn",
35
+ "gutter", "cleaning", "storage", "warehouse", "distribution", "mural", "statue",
36
+ "part", "accessories"
37
+ ]
38
+
39
+ # Filters out departments inside Big Box stores
40
+ DEPARTMENT_TERMS = [
41
+ "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
42
+ "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
43
+ "money", "bank", "cafe", "bistro", "snack", "food court", "customer service"
44
+ ]
45
+
46
+ BRAND_FLOORS = {
47
+ "Macy's": 2, "JCPenney": 2, "Nordstrom": 2, "Sears": 2, "IKEA": 2,
48
+ "Target": 1, "Walmart": 1, "Costco": 1, "Home Depot": 1, "Lowe's": 1,
49
+ "Barnes & Noble": 1, "Dick's Sporting Goods": 1, "Kohl's": 1
50
+ }
51
+
52
+ BRAND_AVG_AREA = {
53
+ "IKEA": 28000, "Walmart": 15000, "Costco": 14000, "Sam's Club": 13000,
54
+ "Meijer": 18000, "Target": 12000, "Home Depot": 10000, "Lowe's": 10000,
55
+ "Kroger": 6000, "Safeway": 5000, "Whole Foods": 4000, "Macy's": 16000,
56
+ "JCPenney": 10000, "Sears": 12000, "Kohl's": 8000, "Dick's Sporting Goods": 4500,
57
+ "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
58
+ "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
59
+ "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
60
+ "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
61
+ }
62
+
63
+ SEARCH_LIST = [
64
+ "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
65
+ "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "Ross Dress for Less", "Burlington",
66
+ "Dick's Sporting Goods", "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
67
+ "IKEA", "Bob's Discount Furniture", "Barnes & Noble", "Office Depot", "OfficeMax",
68
+ "Staples", "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "Sam's Club",
69
+ "Whole Foods", "Trader Joe's", "Michaels", "Aldi", "Old Navy", "Ace Hardware"
70
+ ]
71
+
72
  # ==========================================
73
  # HELPER FUNCTIONS
74
  # ==========================================
 
92
  if target_kml:
93
  gdf = gpd.read_file(target_kml)
94
 
 
 
95
  def force_2d(geometry):
96
  if geometry.has_z:
97
  return transform(lambda x, y, z=None: (x, y), geometry)
 
105
 
106
  def get_roof_area(lat, lng, api_key):
107
  base_url = "https://solar.googleapis.com/v1/buildingInsights:findClosest"
108
+ params = {"location.latitude": lat, "location.longitude": lng, "requiredQuality": "HIGH", "key": api_key}
 
 
 
 
 
109
  try:
110
  resp = requests.get(base_url, params=params)
111
  data = resp.json()
 
137
  pass
138
  return None, None
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # ==========================================
141
+ # MAIN LOGIC
142
  # ==========================================
143
  def process_data(file_obj):
144
  if not API_KEY:
145
  yield "❌ API Key not found! Set GOOGLE_API_KEY in Secrets.", None
146
  return
147
+
148
  if file_obj is None:
149
  yield "❌ Please upload a file.", None
150
  return
 
156
  yield "❌ Failed to read KML/KMZ file.", None
157
  return
158
 
159
+ # Check Area Limit
160
+ gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
161
+ gs_proj = gs.to_crs(epsg=6933)
162
+ area_sq_meters = gs_proj.area.iloc[0]
163
+ if area_sq_meters > 250_000_000:
164
+ yield f"⚠️ AREA TOO LARGE: {area_sq_meters:,.0f} sq m. (Limit: 250M).", None
165
+ return
 
 
 
 
 
166
 
167
  gmaps = googlemaps.Client(key=API_KEY)
168
  results = []
169
  seen_ids = set()
170
  total_brands = len(SEARCH_LIST)
171
 
172
+ # 1. SEARCH LOOP
173
  for i, brand in enumerate(SEARCH_LIST):
174
  yield f"πŸ” Scanning Brand {i+1}/{total_brands}: {brand}...", None
175
 
176
  try:
 
 
177
  places = gmaps.places_nearby(
178
  location=(polygon.centroid.y, polygon.centroid.x),
179
  radius=10000,
 
196
  if pid in seen_ids: continue
197
 
198
  name = p.get('name')
 
 
 
 
 
 
199
  name_clean = name.lower().replace("'", "").replace(".", "")
200
  brand_clean = brand.lower().replace("'", "").replace(".", "")
201
 
202
+ # A. UNIVERSAL NAME CHECK
203
  if brand_clean not in name_clean:
 
204
  if brand_clean == "tjx" and "t.j. maxx" in name_clean: pass
205
  elif brand_clean == "lowe" and "lowe's" in name_clean: pass
206
+ else: continue
207
+
208
+ # B. UNIVERSAL BAD WORD FILTER (Strict)
209
+ if any(term in name_clean for term in UNIVERSAL_BAD_TERMS): continue
210
+
 
 
211
  lat = p['geometry']['location']['lat']
212
  lng = p['geometry']['location']['lng']
213
 
214
+ # C. STRICT CONTAINMENT
 
215
  if not polygon.contains(Point(lng, lat)): continue
216
  seen_ids.add(pid)
217
 
218
+ # FETCH DATA
219
  roof_area = get_roof_area(lat, lng, API_KEY)
220
  height, floors = get_osm_physics(lat, lng)
221
 
222
+ # DATA FILLING
223
  source_note = "SolarAPI"
224
  if roof_area is None:
225
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
226
  source_note = "Brand_Avg (Missing)"
227
  else:
228
+ # Universal Mall Logic
229
  if roof_area > 30000 and brand not in ["IKEA", "Costco", "Meijer", "Sam's Club", "Walmart"]:
230
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
231
  source_note = "Brand_Avg (Mall detected)"
232
+ elif roof_area < 500 and brand not in ["Ace Hardware", "Trader Joe's"]:
233
  roof_area = BRAND_AVG_AREA.get(brand, 2500.0)
234
  source_note = "Brand_Avg (Too Small detected)"
235
 
 
237
  if height is None: height = floors * 6.0
238
 
239
  results.append({
240
+ 'Name': name,
241
+ 'Brand': brand,
242
+ 'Latitude': lat,
243
+ 'Longitude': lng,
244
+ 'Height_m': round(height, 2),
245
+ 'Num_Floors': int(floors),
246
+ 'Area_sqm': round(roof_area, 2),
247
+ 'Data_Source': source_note
248
  })
249
  except:
250
  pass
 
253
  yield "❌ No stores found in this area.", None
254
  return
255
 
256
+ # ==========================================
257
+ # 2. UNIVERSAL POST-PROCESSING (THE LOGIC UPGRADE)
258
+ # ==========================================
259
+ yield "🧹 Performing Universal Deduplication...", None
260
+
261
  df = pd.DataFrame(results)
262
+
263
+ # A. Remove Departments (e.g. Target Grocery, Meijer Deli)
264
+ df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
265
+
266
+ # B. Spatial Deduplication (Group by Brand + Location)
267
+ # Creates a grid ID approx 11 meters. If multiple of same brand exist, keep the one with shortest name.
268
+ df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
269
+
270
+ # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
271
+ df['Name_Len'] = df['Name'].str.len()
272
+ df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
273
 
274
+ # Keep only the first entry per brand at that specific lat/long
275
+ df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
276
+ df = df.drop(columns=['Loc_ID', 'Name_Len'])
277
+
278
+ # C. Universal Strip Mall Splitter
279
+ # If different brands share the exact same roof area (down to decimal), split the area
280
+ df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
281
+
282
+ # Logic: If 3 tenants share 3000sqm, each gets 1000sqm
283
+ df['Final_Area_sqm'] = df.apply(
284
+ lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
285
+ axis=1
286
+ )
287
+
288
+ # Update Data Source note
289
+ df['Data_Source'] = df.apply(
290
+ lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
291
+ axis=1
292
+ )
293
+
294
+ # Clean Export
295
+ final_cols = ['Name', 'Brand', 'Latitude', 'Longitude', 'Height_m', 'Num_Floors', 'Final_Area_sqm', 'Data_Source']
296
+ df_final = df[final_cols].rename(columns={'Final_Area_sqm': 'Area_sqm'})
297
 
298
+ output_path = "Universal_Building_Inventory.csv"
299
+ df_final.to_csv(output_path, index=False)
300
 
301
+ yield f"βœ… Success! Found {len(df_final)} unique commercial assets.", output_path
302
 
303
  # ==========================================
304
  # GRADIO INTERFACE
305
  # ==========================================
306
  iface = gr.Interface(
307
  fn=process_data,
308
+ inputs=gr.File(label="Upload Polygon (KML/KMZ)"),
309
  outputs=[
310
  gr.Textbox(label="Status Log"),
311
  gr.File(label="Download CSV")
312
  ],
313
+ title="🌎 Universal Commercial Asset Scanner",
314
+ description="Upload any KML/KMZ. Scans for Big Box Retail. Applies universal filtering for medical/academic false positives and automatically splits strip-mall roof areas."
315
  )
316
 
317
  iface.launch()