FlutIQ / app /tools /geocoder.py
kredd25's picture
v0.15.2: detect commercial properties, skip homeowner advice
bebb952
"""Geocoding with Nominatim β†’ US Census fallback.
Nominatim works worldwide but its US residential coverage is patchy
(many real addresses simply aren't in OSM). The US Census Geocoder
has authoritative TIGER coverage of US addresses, so we fall back
to it whenever Nominatim returns nothing.
"""
from typing import Optional
import httpx
from app.config import USER_AGENT
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
CENSUS_URL = (
"https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress"
)
# Nominatim `class` values that imply a non-residential building/site.
_COMMERCIAL_CLASSES = {
"amenity", "tourism", "shop", "office", "industrial",
"leisure", "healthcare", "aeroway", "railway", "military",
}
# Nominatim `type` values that imply commercial / institutional.
_COMMERCIAL_TYPES = {
"commercial", "industrial", "office", "retail", "warehouse",
"hotel", "motel", "school", "kindergarten", "college", "university",
"hospital", "clinic", "pharmacy",
"church", "cathedral", "mosque", "synagogue", "temple",
"airport", "aerodrome", "terminal", "train_station", "bus_station",
"government", "public_building", "civic", "fire_station",
"police", "courthouse", "embassy", "townhall",
"stadium", "sports_centre", "museum", "library", "attraction",
}
# Nominatim `type` values that confirm residential β€” short-circuit any
# ambiguous parent class.
_RESIDENTIAL_TYPES = {
"house", "apartments", "residential", "detached",
"semi_detached", "terrace", "bungalow", "dormitory",
}
def _classify_property_type(r: dict) -> str:
"""Return 'residential' or 'commercial'. Defaults to residential
unless Nominatim metadata clearly indicates otherwise β€” FlutIQ's
audience is homeowners, so we only divert on a confident commercial
signal."""
osm_class = (r.get("class") or "").lower()
osm_type = (r.get("type") or "").lower()
name = (r.get("name") or "").strip()
if osm_type in _RESIDENTIAL_TYPES:
return "residential"
if osm_class in _COMMERCIAL_CLASSES or osm_type in _COMMERCIAL_TYPES:
return "commercial"
# A `class=building` hit with an OSM-tagged building name is almost
# always an institutional/commercial building β€” pure street addresses
# come back with no `name` populated.
if osm_class == "building" and name:
return "commercial"
return "residential"
# US state name β†’ 2-letter abbrev (just enough to keep "state" useful for
# downstream agents that compare on names like "Illinois").
_US_STATES = {
"AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
"CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
"DC": "District of Columbia", "FL": "Florida", "GA": "Georgia",
"HI": "Hawaii", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana",
"IA": "Iowa", "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana",
"ME": "Maine", "MD": "Maryland", "MA": "Massachusetts", "MI": "Michigan",
"MN": "Minnesota", "MS": "Mississippi", "MO": "Missouri", "MT": "Montana",
"NE": "Nebraska", "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey",
"NM": "New Mexico", "NY": "New York", "NC": "North Carolina",
"ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma", "OR": "Oregon",
"PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina",
"SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah",
"VT": "Vermont", "VA": "Virginia", "WA": "Washington",
"WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
}
async def _try_nominatim(address: str) -> Optional[dict]:
params = {
"q": address,
"format": "json",
"limit": 1,
"addressdetails": 1,
}
headers = {"User-Agent": USER_AGENT}
async with httpx.AsyncClient(timeout=15, headers=headers) as client:
resp = await client.get(NOMINATIM_URL, params=params)
if resp.status_code != 200:
return None
results = resp.json()
if not results:
return None
r = results[0]
addr = r.get("address", {})
return {
"lat": float(r["lat"]),
"lon": float(r["lon"]),
"display_name": r.get("display_name", address),
"city": addr.get("city") or addr.get("town") or addr.get("village") or "",
"state": addr.get("state", ""),
"county": addr.get("county", ""),
"property_type": _classify_property_type(r),
"source": "nominatim",
}
async def _try_census(address: str) -> Optional[dict]:
params = {
"address": address,
"benchmark": "Public_AR_Current",
"vintage": "Current_Current",
"format": "json",
}
async with httpx.AsyncClient(timeout=20) as client:
resp = await client.get(CENSUS_URL, params=params)
if resp.status_code != 200:
return None
matches = (
resp.json().get("result", {}).get("addressMatches") or []
)
if not matches:
return None
m = matches[0]
coords = m.get("coordinates") or {}
components = m.get("addressComponents") or {}
geos = m.get("geographies") or {}
counties = geos.get("Counties") or []
county_name = counties[0].get("NAME") if counties else ""
state_abbrev = components.get("state") or ""
state_full = _US_STATES.get(state_abbrev, state_abbrev)
return {
"lat": float(coords.get("y")),
"lon": float(coords.get("x")),
"display_name": m.get("matchedAddress", address),
"city": (components.get("city") or "").title(),
"state": state_full,
"county": county_name,
# Census Geocoder resolves TIGER/Line street addresses and doesn't
# expose building-type metadata. Default to residential β€” the
# commercial-buildings-with-names case is handled by Nominatim,
# which we try first.
"property_type": "residential",
"source": "census",
}
async def geocode_address(address: str) -> Optional[dict]:
nom = await _try_nominatim(address)
if nom:
return nom
return await _try_census(address)