Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,9 +21,9 @@ except:
|
|
| 21 |
API_KEY = None
|
| 22 |
|
| 23 |
# ==========================================
|
| 24 |
-
# 1. UNIVERSAL FILTER LISTS
|
| 25 |
# ==========================================
|
| 26 |
-
# Filters out Schools, Doctors, Industrial services, etc.
|
| 27 |
UNIVERSAL_BAD_TERMS = [
|
| 28 |
"dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
|
| 29 |
"therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
|
|
@@ -36,11 +36,44 @@ UNIVERSAL_BAD_TERMS = [
|
|
| 36 |
"part", "accessories"
|
| 37 |
]
|
| 38 |
|
| 39 |
-
# Filters out departments inside Big Box stores
|
| 40 |
DEPARTMENT_TERMS = [
|
| 41 |
"grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
|
| 42 |
"portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
|
| 43 |
-
"money", "bank", "cafe", "bistro", "snack", "food court", "customer service"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
]
|
| 45 |
|
| 46 |
BRAND_FLOORS = {
|
|
@@ -57,18 +90,10 @@ BRAND_AVG_AREA = {
|
|
| 57 |
"T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
|
| 58 |
"Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
|
| 59 |
"Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
|
| 60 |
-
"Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
|
|
|
|
| 61 |
}
|
| 62 |
|
| 63 |
-
SEARCH_LIST = [
|
| 64 |
-
"Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
|
| 65 |
-
"TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "Ross Dress for Less", "Burlington",
|
| 66 |
-
"Dick's Sporting Goods", "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
|
| 67 |
-
"IKEA", "Bob's Discount Furniture", "Barnes & Noble", "Office Depot", "OfficeMax",
|
| 68 |
-
"Staples", "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "Sam's Club",
|
| 69 |
-
"Whole Foods", "Trader Joe's", "Michaels", "Aldi", "Old Navy", "Ace Hardware"
|
| 70 |
-
]
|
| 71 |
-
|
| 72 |
# ==========================================
|
| 73 |
# HELPER FUNCTIONS
|
| 74 |
# ==========================================
|
|
@@ -80,6 +105,7 @@ def load_geodata_to_polygon(file_obj):
|
|
| 80 |
|
| 81 |
target_kml = None
|
| 82 |
try:
|
|
|
|
| 83 |
if file_obj.name.lower().endswith('.kmz'):
|
| 84 |
with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
|
| 85 |
zip_ref.extractall(extract_path)
|
|
@@ -92,6 +118,7 @@ def load_geodata_to_polygon(file_obj):
|
|
| 92 |
if target_kml:
|
| 93 |
gdf = gpd.read_file(target_kml)
|
| 94 |
|
|
|
|
| 95 |
def force_2d(geometry):
|
| 96 |
if geometry.has_z:
|
| 97 |
return transform(lambda x, y, z=None: (x, y), geometry)
|
|
@@ -156,7 +183,7 @@ def process_data(file_obj):
|
|
| 156 |
yield "❌ Failed to read KML/KMZ file.", None
|
| 157 |
return
|
| 158 |
|
| 159 |
-
#
|
| 160 |
gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
|
| 161 |
gs_proj = gs.to_crs(epsg=6933)
|
| 162 |
area_sq_meters = gs_proj.area.iloc[0]
|
|
@@ -254,38 +281,35 @@ def process_data(file_obj):
|
|
| 254 |
return
|
| 255 |
|
| 256 |
# ==========================================
|
| 257 |
-
# 2. UNIVERSAL POST-PROCESSING
|
| 258 |
# ==========================================
|
| 259 |
yield "🧹 Performing Universal Deduplication...", None
|
| 260 |
|
| 261 |
df = pd.DataFrame(results)
|
| 262 |
|
| 263 |
-
# A. Remove Departments (
|
| 264 |
df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
|
| 265 |
|
| 266 |
# B. Spatial Deduplication (Group by Brand + Location)
|
| 267 |
-
# Creates a grid ID approx 11 meters.
|
| 268 |
df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
|
| 269 |
|
| 270 |
# Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
|
| 271 |
df['Name_Len'] = df['Name'].str.len()
|
| 272 |
df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
|
| 273 |
|
| 274 |
-
#
|
| 275 |
df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
|
| 276 |
df = df.drop(columns=['Loc_ID', 'Name_Len'])
|
| 277 |
|
| 278 |
# C. Universal Strip Mall Splitter
|
| 279 |
-
# If different brands share the exact same roof area (down to decimal), split the area
|
| 280 |
df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
|
| 281 |
|
| 282 |
-
# Logic: If 3 tenants share 3000sqm, each gets 1000sqm
|
| 283 |
df['Final_Area_sqm'] = df.apply(
|
| 284 |
lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
|
| 285 |
axis=1
|
| 286 |
)
|
| 287 |
|
| 288 |
-
# Update Data Source note
|
| 289 |
df['Data_Source'] = df.apply(
|
| 290 |
lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
|
| 291 |
axis=1
|
|
@@ -311,7 +335,7 @@ iface = gr.Interface(
|
|
| 311 |
gr.File(label="Download CSV")
|
| 312 |
],
|
| 313 |
title="🌎 Universal Commercial Asset Scanner",
|
| 314 |
-
description="Upload any KML/KMZ. Scans for Big Box
|
| 315 |
)
|
| 316 |
|
| 317 |
iface.launch()
|
|
|
|
| 21 |
API_KEY = None
|
| 22 |
|
| 23 |
# ==========================================
|
| 24 |
+
# 1. UNIVERSAL FILTER LISTS
|
| 25 |
# ==========================================
|
| 26 |
+
# Filters out Schools, Doctors, Industrial services, etc. (Universal)
|
| 27 |
UNIVERSAL_BAD_TERMS = [
|
| 28 |
"dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
|
| 29 |
"therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
|
|
|
|
| 36 |
"part", "accessories"
|
| 37 |
]
|
| 38 |
|
| 39 |
+
# Filters out departments inside Big Box stores (Universal)
|
| 40 |
DEPARTMENT_TERMS = [
|
| 41 |
"grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
|
| 42 |
"portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
|
| 43 |
+
"money", "bank", "cafe", "bistro", "snack", "food court", "customer service",
|
| 44 |
+
"floral", "flowers", "store on"
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
# ==========================================
|
| 48 |
+
# 2. COMPREHENSIVE STORE LIST
|
| 49 |
+
# ==========================================
|
| 50 |
+
# This covers every variation you requested (Walmart, Ikea, Dicks, BJs, etc)
|
| 51 |
+
SEARCH_LIST = [
|
| 52 |
+
# Big Box / Department
|
| 53 |
+
"Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
|
| 54 |
+
"Nordstrom", "Costco", "Sam's Club", "BJ's Wholesale Club",
|
| 55 |
+
|
| 56 |
+
# Clothing / Discount
|
| 57 |
+
"TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
|
| 58 |
+
"Ross Dress for Less", "Burlington", "Old Navy", "DSW Designer Shoe Warehouse",
|
| 59 |
+
|
| 60 |
+
# Home Improvement / Hardware
|
| 61 |
+
"Home Depot", "Lowe's", "Ace Hardware", "Menards",
|
| 62 |
+
|
| 63 |
+
# Electronics / Office
|
| 64 |
+
"Best Buy", "Office Depot", "OfficeMax", "Staples",
|
| 65 |
+
|
| 66 |
+
# Furniture
|
| 67 |
+
"IKEA", "Bob's Discount Furniture", "Raymour & Flanigan", "Ashley Furniture",
|
| 68 |
+
|
| 69 |
+
# Grocery
|
| 70 |
+
"Kroger", "Meijer", "Whole Foods", "Trader Joe's", "Aldi", "Lidl",
|
| 71 |
+
"Safeway", "Albertsons", "ShopRite", "Stop & Shop", "Publix", "Wegmans",
|
| 72 |
+
|
| 73 |
+
# Hobbies / Pets / Sporting
|
| 74 |
+
"Dick's Sporting Goods", "Bass Pro Shops", "Cabela's", "REI",
|
| 75 |
+
"Michaels", "Hobby Lobby", "Barnes & Noble",
|
| 76 |
+
"PetSmart", "Petco"
|
| 77 |
]
|
| 78 |
|
| 79 |
BRAND_FLOORS = {
|
|
|
|
| 90 |
"T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
|
| 91 |
"Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
|
| 92 |
"Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
|
| 93 |
+
"Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000,
|
| 94 |
+
"Hobby Lobby": 5000, "BJ's Wholesale Club": 10000
|
| 95 |
}
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# ==========================================
|
| 98 |
# HELPER FUNCTIONS
|
| 99 |
# ==========================================
|
|
|
|
| 105 |
|
| 106 |
target_kml = None
|
| 107 |
try:
|
| 108 |
+
# HANDLING KML AND KMZ HERE
|
| 109 |
if file_obj.name.lower().endswith('.kmz'):
|
| 110 |
with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
|
| 111 |
zip_ref.extractall(extract_path)
|
|
|
|
| 118 |
if target_kml:
|
| 119 |
gdf = gpd.read_file(target_kml)
|
| 120 |
|
| 121 |
+
# FORCE 2D FIX (Prevents crashes on 3D KMLs)
|
| 122 |
def force_2d(geometry):
|
| 123 |
if geometry.has_z:
|
| 124 |
return transform(lambda x, y, z=None: (x, y), geometry)
|
|
|
|
| 183 |
yield "❌ Failed to read KML/KMZ file.", None
|
| 184 |
return
|
| 185 |
|
| 186 |
+
# --- CHECK AREA LIMIT HERE (250,000,000 sq m) ---
|
| 187 |
gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
|
| 188 |
gs_proj = gs.to_crs(epsg=6933)
|
| 189 |
area_sq_meters = gs_proj.area.iloc[0]
|
|
|
|
| 281 |
return
|
| 282 |
|
| 283 |
# ==========================================
|
| 284 |
+
# 2. UNIVERSAL POST-PROCESSING
|
| 285 |
# ==========================================
|
| 286 |
yield "🧹 Performing Universal Deduplication...", None
|
| 287 |
|
| 288 |
df = pd.DataFrame(results)
|
| 289 |
|
| 290 |
+
# A. Remove Departments (Target Grocery, Meijer Deli, Kroger Floral)
|
| 291 |
df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
|
| 292 |
|
| 293 |
# B. Spatial Deduplication (Group by Brand + Location)
|
| 294 |
+
# Creates a grid ID approx 11 meters.
|
| 295 |
df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
|
| 296 |
|
| 297 |
# Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
|
| 298 |
df['Name_Len'] = df['Name'].str.len()
|
| 299 |
df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
|
| 300 |
|
| 301 |
+
# Drop duplicates, keeping the shortest name
|
| 302 |
df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
|
| 303 |
df = df.drop(columns=['Loc_ID', 'Name_Len'])
|
| 304 |
|
| 305 |
# C. Universal Strip Mall Splitter
|
|
|
|
| 306 |
df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
|
| 307 |
|
|
|
|
| 308 |
df['Final_Area_sqm'] = df.apply(
|
| 309 |
lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
|
| 310 |
axis=1
|
| 311 |
)
|
| 312 |
|
|
|
|
| 313 |
df['Data_Source'] = df.apply(
|
| 314 |
lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
|
| 315 |
axis=1
|
|
|
|
| 335 |
gr.File(label="Download CSV")
|
| 336 |
],
|
| 337 |
title="🌎 Universal Commercial Asset Scanner",
|
| 338 |
+
description="Upload any KML/KMZ. Scans for 50+ Big Box Brands. Includes auto-deduplication for departments (Floral, Pharmacy) and area splitting for strip malls."
|
| 339 |
)
|
| 340 |
|
| 341 |
iface.launch()
|