forever-sheikh commited on
Commit
7a36d20
·
verified ·
1 Parent(s): d09b0df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -23
app.py CHANGED
@@ -21,9 +21,9 @@ except:
21
  API_KEY = None
22
 
23
  # ==========================================
24
- # 1. UNIVERSAL FILTER LISTS (THE FIX)
25
  # ==========================================
26
- # Filters out Schools, Doctors, Industrial services, etc.
27
  UNIVERSAL_BAD_TERMS = [
28
  "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
29
  "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
@@ -36,11 +36,44 @@ UNIVERSAL_BAD_TERMS = [
36
  "part", "accessories"
37
  ]
38
 
39
- # Filters out departments inside Big Box stores
40
  DEPARTMENT_TERMS = [
41
  "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
42
  "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
43
- "money", "bank", "cafe", "bistro", "snack", "food court", "customer service"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ]
45
 
46
  BRAND_FLOORS = {
@@ -57,18 +90,10 @@ BRAND_AVG_AREA = {
57
  "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
58
  "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
59
  "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
60
- "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
 
61
  }
62
 
63
- SEARCH_LIST = [
64
- "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
65
- "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "Ross Dress for Less", "Burlington",
66
- "Dick's Sporting Goods", "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
67
- "IKEA", "Bob's Discount Furniture", "Barnes & Noble", "Office Depot", "OfficeMax",
68
- "Staples", "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "Sam's Club",
69
- "Whole Foods", "Trader Joe's", "Michaels", "Aldi", "Old Navy", "Ace Hardware"
70
- ]
71
-
72
  # ==========================================
73
  # HELPER FUNCTIONS
74
  # ==========================================
@@ -80,6 +105,7 @@ def load_geodata_to_polygon(file_obj):
80
 
81
  target_kml = None
82
  try:
 
83
  if file_obj.name.lower().endswith('.kmz'):
84
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
85
  zip_ref.extractall(extract_path)
@@ -92,6 +118,7 @@ def load_geodata_to_polygon(file_obj):
92
  if target_kml:
93
  gdf = gpd.read_file(target_kml)
94
 
 
95
  def force_2d(geometry):
96
  if geometry.has_z:
97
  return transform(lambda x, y, z=None: (x, y), geometry)
@@ -156,7 +183,7 @@ def process_data(file_obj):
156
  yield "❌ Failed to read KML/KMZ file.", None
157
  return
158
 
159
- # Check Area Limit
160
  gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
161
  gs_proj = gs.to_crs(epsg=6933)
162
  area_sq_meters = gs_proj.area.iloc[0]
@@ -254,38 +281,35 @@ def process_data(file_obj):
254
  return
255
 
256
  # ==========================================
257
- # 2. UNIVERSAL POST-PROCESSING (THE LOGIC UPGRADE)
258
  # ==========================================
259
  yield "🧹 Performing Universal Deduplication...", None
260
 
261
  df = pd.DataFrame(results)
262
 
263
- # A. Remove Departments (e.g. Target Grocery, Meijer Deli)
264
  df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
265
 
266
  # B. Spatial Deduplication (Group by Brand + Location)
267
- # Creates a grid ID approx 11 meters. If multiple of same brand exist, keep the one with shortest name.
268
  df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
269
 
270
  # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
271
  df['Name_Len'] = df['Name'].str.len()
272
  df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
273
 
274
- # Keep only the first entry per brand at that specific lat/long
275
  df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
276
  df = df.drop(columns=['Loc_ID', 'Name_Len'])
277
 
278
  # C. Universal Strip Mall Splitter
279
- # If different brands share the exact same roof area (down to decimal), split the area
280
  df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
281
 
282
- # Logic: If 3 tenants share 3000sqm, each gets 1000sqm
283
  df['Final_Area_sqm'] = df.apply(
284
  lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
285
  axis=1
286
  )
287
 
288
- # Update Data Source note
289
  df['Data_Source'] = df.apply(
290
  lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
291
  axis=1
@@ -311,7 +335,7 @@ iface = gr.Interface(
311
  gr.File(label="Download CSV")
312
  ],
313
  title="🌎 Universal Commercial Asset Scanner",
314
- description="Upload any KML/KMZ. Scans for Big Box Retail. Applies universal filtering for medical/academic false positives and automatically splits strip-mall roof areas."
315
  )
316
 
317
  iface.launch()
 
21
  API_KEY = None
22
 
23
  # ==========================================
24
+ # 1. UNIVERSAL FILTER LISTS
25
  # ==========================================
26
+ # Filters out Schools, Doctors, Industrial services, etc. (Universal)
27
  UNIVERSAL_BAD_TERMS = [
28
  "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
29
  "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
 
36
  "part", "accessories"
37
  ]
38
 
39
+ # Filters out departments inside Big Box stores (Universal)
40
  DEPARTMENT_TERMS = [
41
  "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
42
  "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
43
+ "money", "bank", "cafe", "bistro", "snack", "food court", "customer service",
44
+ "floral", "flowers", "store on"
45
+ ]
46
+
47
+ # ==========================================
48
+ # 2. COMPREHENSIVE STORE LIST
49
+ # ==========================================
50
+ # This covers every variation you requested (Walmart, Ikea, Dicks, BJs, etc)
51
+ SEARCH_LIST = [
52
+ # Big Box / Department
53
+ "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
54
+ "Nordstrom", "Costco", "Sam's Club", "BJ's Wholesale Club",
55
+
56
+ # Clothing / Discount
57
+ "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
58
+ "Ross Dress for Less", "Burlington", "Old Navy", "DSW Designer Shoe Warehouse",
59
+
60
+ # Home Improvement / Hardware
61
+ "Home Depot", "Lowe's", "Ace Hardware", "Menards",
62
+
63
+ # Electronics / Office
64
+ "Best Buy", "Office Depot", "OfficeMax", "Staples",
65
+
66
+ # Furniture
67
+ "IKEA", "Bob's Discount Furniture", "Raymour & Flanigan", "Ashley Furniture",
68
+
69
+ # Grocery
70
+ "Kroger", "Meijer", "Whole Foods", "Trader Joe's", "Aldi", "Lidl",
71
+ "Safeway", "Albertsons", "ShopRite", "Stop & Shop", "Publix", "Wegmans",
72
+
73
+ # Hobbies / Pets / Sporting
74
+ "Dick's Sporting Goods", "Bass Pro Shops", "Cabela's", "REI",
75
+ "Michaels", "Hobby Lobby", "Barnes & Noble",
76
+ "PetSmart", "Petco"
77
  ]
78
 
79
  BRAND_FLOORS = {
 
90
  "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
91
  "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
92
  "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
93
+ "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000,
94
+ "Hobby Lobby": 5000, "BJ's Wholesale Club": 10000
95
  }
96
 
 
 
 
 
 
 
 
 
 
97
  # ==========================================
98
  # HELPER FUNCTIONS
99
  # ==========================================
 
105
 
106
  target_kml = None
107
  try:
108
+ # HANDLING KML AND KMZ HERE
109
  if file_obj.name.lower().endswith('.kmz'):
110
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
111
  zip_ref.extractall(extract_path)
 
118
  if target_kml:
119
  gdf = gpd.read_file(target_kml)
120
 
121
+ # FORCE 2D FIX (Prevents crashes on 3D KMLs)
122
  def force_2d(geometry):
123
  if geometry.has_z:
124
  return transform(lambda x, y, z=None: (x, y), geometry)
 
183
  yield "❌ Failed to read KML/KMZ file.", None
184
  return
185
 
186
+ # --- CHECK AREA LIMIT HERE (250,000,000 sq m) ---
187
  gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
188
  gs_proj = gs.to_crs(epsg=6933)
189
  area_sq_meters = gs_proj.area.iloc[0]
 
281
  return
282
 
283
  # ==========================================
284
+ # 2. UNIVERSAL POST-PROCESSING
285
  # ==========================================
286
  yield "🧹 Performing Universal Deduplication...", None
287
 
288
  df = pd.DataFrame(results)
289
 
290
+ # A. Remove Departments (Target Grocery, Meijer Deli, Kroger Floral)
291
  df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
292
 
293
  # B. Spatial Deduplication (Group by Brand + Location)
294
+ # Creates a grid ID approx 11 meters.
295
  df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
296
 
297
  # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
298
  df['Name_Len'] = df['Name'].str.len()
299
  df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
300
 
301
+ # Drop duplicates, keeping the shortest name
302
  df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
303
  df = df.drop(columns=['Loc_ID', 'Name_Len'])
304
 
305
  # C. Universal Strip Mall Splitter
 
306
  df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
307
 
 
308
  df['Final_Area_sqm'] = df.apply(
309
  lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
310
  axis=1
311
  )
312
 
 
313
  df['Data_Source'] = df.apply(
314
  lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
315
  axis=1
 
335
  gr.File(label="Download CSV")
336
  ],
337
  title="🌎 Universal Commercial Asset Scanner",
338
+ description="Upload any KML/KMZ. Scans for 50+ Big Box Brands. Includes auto-deduplication for departments (Floral, Pharmacy) and area splitting for strip malls."
339
  )
340
 
341
  iface.launch()