Spaces:

forever-sheikh
/

Building-Scanner

Sleeping

App Files Files Community

forever-sheikh commited on Jan 6

Commit

7a36d20

verified ·

1 Parent(s): d09b0df

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -23

app.py CHANGED Viewed

@@ -21,9 +21,9 @@ except:
     API_KEY = None
 # ==========================================
-# 1. UNIVERSAL FILTER LISTS (THE FIX)
 # ==========================================
-# Filters out Schools, Doctors, Industrial services, etc.
 UNIVERSAL_BAD_TERMS = [
     "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
     "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
@@ -36,11 +36,44 @@ UNIVERSAL_BAD_TERMS = [
     "part", "accessories"
 ]
-# Filters out departments inside Big Box stores
 DEPARTMENT_TERMS = [
     "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
     "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
-    "money", "bank", "cafe", "bistro", "snack", "food court", "customer service"
 ]
 BRAND_FLOORS = {
@@ -57,18 +90,10 @@ BRAND_AVG_AREA = {
     "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
     "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
     "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
-    "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000
 }
-SEARCH_LIST = [
-    "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
-    "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "Ross Dress for Less", "Burlington",
-    "Dick's Sporting Goods", "Albertsons", "Safeway", "Home Depot", "Lowe's", "Best Buy",
-    "IKEA", "Bob's Discount Furniture", "Barnes & Noble", "Office Depot", "OfficeMax",
-    "Staples", "PetSmart", "Petco", "Kroger", "Meijer", "Costco", "Sam's Club",
-    "Whole Foods", "Trader Joe's", "Michaels", "Aldi", "Old Navy", "Ace Hardware"
-]
 # ==========================================
 # HELPER FUNCTIONS
 # ==========================================
@@ -80,6 +105,7 @@ def load_geodata_to_polygon(file_obj):
     target_kml = None
     try:
         if file_obj.name.lower().endswith('.kmz'):
             with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
                 zip_ref.extractall(extract_path)
@@ -92,6 +118,7 @@ def load_geodata_to_polygon(file_obj):
         if target_kml:
             gdf = gpd.read_file(target_kml)
             def force_2d(geometry):
                 if geometry.has_z:
                     return transform(lambda x, y, z=None: (x, y), geometry)
@@ -156,7 +183,7 @@ def process_data(file_obj):
         yield "❌ Failed to read KML/KMZ file.", None
         return
-    # Check Area Limit
     gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
     gs_proj = gs.to_crs(epsg=6933)
     area_sq_meters = gs_proj.area.iloc[0]
@@ -254,38 +281,35 @@ def process_data(file_obj):
         return
     # ==========================================
-    # 2. UNIVERSAL POST-PROCESSING (THE LOGIC UPGRADE)
     # ==========================================
     yield "🧹 Performing Universal Deduplication...", None
     df = pd.DataFrame(results)
-    # A. Remove Departments (e.g. Target Grocery, Meijer Deli)
     df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
     # B. Spatial Deduplication (Group by Brand + Location)
-    # Creates a grid ID approx 11 meters. If multiple of same brand exist, keep the one with shortest name.
     df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
     # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
     df['Name_Len'] = df['Name'].str.len()
     df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
-    # Keep only the first entry per brand at that specific lat/long
     df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
     df = df.drop(columns=['Loc_ID', 'Name_Len'])
     # C. Universal Strip Mall Splitter
-    # If different brands share the exact same roof area (down to decimal), split the area
     df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
-    # Logic: If 3 tenants share 3000sqm, each gets 1000sqm
     df['Final_Area_sqm'] = df.apply(
         lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
         axis=1
     )
-    # Update Data Source note
     df['Data_Source'] = df.apply(
         lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
         axis=1
@@ -311,7 +335,7 @@ iface = gr.Interface(
         gr.File(label="Download CSV")
     ],
     title="🌎 Universal Commercial Asset Scanner",
-    description="Upload any KML/KMZ. Scans for Big Box Retail. Applies universal filtering for medical/academic false positives and automatically splits strip-mall roof areas."
 )
 iface.launch()

     API_KEY = None
 # ==========================================
+# 1. UNIVERSAL FILTER LISTS
 # ==========================================
+# Filters out Schools, Doctors, Industrial services, etc. (Universal)
 UNIVERSAL_BAD_TERMS = [
     "dr.", "dds", "md", "phd", "lcsw", "medical", "clinic", "health", "rehab",
     "therapy", "counseling", "chiropractor", "dental", "orthodontics", "hospital",
     "part", "accessories"
 ]
+# Filters out departments inside Big Box stores (Universal)
 DEPARTMENT_TERMS = [
     "grocery", "deli", "bakery", "pharmacy", "optical", "hearing", "photo",
     "portrait", "garden", "nursery", "mobile", "tech", "geek", "pickup",
+    "money", "bank", "cafe", "bistro", "snack", "food court", "customer service",
+    "floral", "flowers", "store on"
+]
+# ==========================================
+# 2. COMPREHENSIVE STORE LIST
+# ==========================================
+# This covers every variation you requested (Walmart, Ikea, Dicks, BJs, etc)
+SEARCH_LIST = [
+    # Big Box / Department
+    "Walmart", "Target", "Kmart", "Sears", "Kohl's", "Macy's", "JCPenney",
+    "Nordstrom", "Costco", "Sam's Club", "BJ's Wholesale Club",
+    # Clothing / Discount
+    "TJX", "T.J. Maxx", "Marshalls", "HomeGoods", "HomeSense",
+    "Ross Dress for Less", "Burlington", "Old Navy", "DSW Designer Shoe Warehouse",
+    # Home Improvement / Hardware
+    "Home Depot", "Lowe's", "Ace Hardware", "Menards",
+    # Electronics / Office
+    "Best Buy", "Office Depot", "OfficeMax", "Staples",
+    # Furniture
+    "IKEA", "Bob's Discount Furniture", "Raymour & Flanigan", "Ashley Furniture",
+    # Grocery
+    "Kroger", "Meijer", "Whole Foods", "Trader Joe's", "Aldi", "Lidl",
+    "Safeway", "Albertsons", "ShopRite", "Stop & Shop", "Publix", "Wegmans",
+    # Hobbies / Pets / Sporting
+    "Dick's Sporting Goods", "Bass Pro Shops", "Cabela's", "REI",
+    "Michaels", "Hobby Lobby", "Barnes & Noble",
+    "PetSmart", "Petco"
 ]
 BRAND_FLOORS = {
     "T.J. Maxx": 2800, "Marshalls": 2800, "Ross Dress for Less": 2800,
     "Old Navy": 1400, "Barnes & Noble": 2500, "Best Buy": 3500, "Staples": 2000,
     "Office Depot": 2000, "PetSmart": 1800, "Petco": 1400, "Trader Joe's": 1200,
+    "Aldi": 1500, "Lidl": 1500, "Ace Hardware": 800, "DSW Designer Shoe Warehouse": 2000,
+    "Hobby Lobby": 5000, "BJ's Wholesale Club": 10000
 }
 # ==========================================
 # HELPER FUNCTIONS
 # ==========================================
     target_kml = None
     try:
+        # HANDLING KML AND KMZ HERE
         if file_obj.name.lower().endswith('.kmz'):
             with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
                 zip_ref.extractall(extract_path)
         if target_kml:
             gdf = gpd.read_file(target_kml)
+            # FORCE 2D FIX (Prevents crashes on 3D KMLs)
             def force_2d(geometry):
                 if geometry.has_z:
                     return transform(lambda x, y, z=None: (x, y), geometry)
         yield "❌ Failed to read KML/KMZ file.", None
         return
+    # --- CHECK AREA LIMIT HERE (250,000,000 sq m) ---
     gs = gpd.GeoSeries([polygon], crs="EPSG:4326")
     gs_proj = gs.to_crs(epsg=6933)
     area_sq_meters = gs_proj.area.iloc[0]
         return
     # ==========================================
+    # 2. UNIVERSAL POST-PROCESSING
     # ==========================================
     yield "🧹 Performing Universal Deduplication...", None
     df = pd.DataFrame(results)
+    # A. Remove Departments (Target Grocery, Meijer Deli, Kroger Floral)
     df = df[~df['Name'].str.contains('|'.join(DEPARTMENT_TERMS), case=False, na=False)]
     # B. Spatial Deduplication (Group by Brand + Location)
+    # Creates a grid ID approx 11 meters.
     df['Loc_ID'] = df['Latitude'].round(4).astype(str) + "_" + df['Longitude'].round(4).astype(str)
     # Sort by Name Length (Shortest name usually "Target", longest usually "Target Grocery ...")
     df['Name_Len'] = df['Name'].str.len()
     df = df.sort_values(by=['Brand', 'Loc_ID', 'Name_Len'])
+    # Drop duplicates, keeping the shortest name
     df = df.drop_duplicates(subset=['Brand', 'Loc_ID'], keep='first')
     df = df.drop(columns=['Loc_ID', 'Name_Len'])
     # C. Universal Strip Mall Splitter
     df['Tenant_Count'] = df.groupby('Area_sqm')['Area_sqm'].transform('count')
     df['Final_Area_sqm'] = df.apply(
         lambda x: x['Area_sqm'] / x['Tenant_Count'] if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Area_sqm'],
         axis=1
     )
     df['Data_Source'] = df.apply(
         lambda x: x['Data_Source'] + f" (Split w/ {x['Tenant_Count']-1} tenants)" if x['Tenant_Count'] > 1 and x['Area_sqm'] > 5000 else x['Data_Source'],
         axis=1
         gr.File(label="Download CSV")
     ],
     title="🌎 Universal Commercial Asset Scanner",
+    description="Upload any KML/KMZ. Scans for 50+ Big Box Brands. Includes auto-deduplication for departments (Floral, Pharmacy) and area splitting for strip malls."
 )
 iface.launch()