File size: 6,131 Bytes
711e16f
 
 
 
 
 
 
577ea9f
 
 
 
 
 
 
711e16f
 
 
577ea9f
bebb952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711e16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577ea9f
711e16f
 
577ea9f
 
 
 
 
 
 
 
 
 
711e16f
 
577ea9f
 
 
 
 
 
 
 
 
 
 
 
 
bebb952
711e16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebb952
 
 
 
 
711e16f
577ea9f
711e16f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Geocoding with Nominatim β†’ US Census fallback.

Nominatim works worldwide but its US residential coverage is patchy
(many real addresses simply aren't in OSM). The US Census Geocoder
has authoritative TIGER coverage of US addresses, so we fall back
to it whenever Nominatim returns nothing.
"""
from typing import Optional

import httpx

from app.config import USER_AGENT

NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
CENSUS_URL = (
    "https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress"
)

# Nominatim `class` values that imply a non-residential building/site.
_COMMERCIAL_CLASSES = {
    "amenity", "tourism", "shop", "office", "industrial",
    "leisure", "healthcare", "aeroway", "railway", "military",
}

# Nominatim `type` values that imply commercial / institutional.
_COMMERCIAL_TYPES = {
    "commercial", "industrial", "office", "retail", "warehouse",
    "hotel", "motel", "school", "kindergarten", "college", "university",
    "hospital", "clinic", "pharmacy",
    "church", "cathedral", "mosque", "synagogue", "temple",
    "airport", "aerodrome", "terminal", "train_station", "bus_station",
    "government", "public_building", "civic", "fire_station",
    "police", "courthouse", "embassy", "townhall",
    "stadium", "sports_centre", "museum", "library", "attraction",
}

# Nominatim `type` values that confirm residential β€” short-circuit any
# ambiguous parent class.
_RESIDENTIAL_TYPES = {
    "house", "apartments", "residential", "detached",
    "semi_detached", "terrace", "bungalow", "dormitory",
}


def _classify_property_type(r: dict) -> str:
    """Return 'residential' or 'commercial'. Defaults to residential
    unless Nominatim metadata clearly indicates otherwise β€” FlutIQ's
    audience is homeowners, so we only divert on a confident commercial
    signal."""
    osm_class = (r.get("class") or "").lower()
    osm_type = (r.get("type") or "").lower()
    name = (r.get("name") or "").strip()

    if osm_type in _RESIDENTIAL_TYPES:
        return "residential"
    if osm_class in _COMMERCIAL_CLASSES or osm_type in _COMMERCIAL_TYPES:
        return "commercial"
    # A `class=building` hit with an OSM-tagged building name is almost
    # always an institutional/commercial building β€” pure street addresses
    # come back with no `name` populated.
    if osm_class == "building" and name:
        return "commercial"
    return "residential"

# US state name β†’ 2-letter abbrev (just enough to keep "state" useful for
# downstream agents that compare on names like "Illinois").
_US_STATES = {
    "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
    "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
    "DC": "District of Columbia", "FL": "Florida", "GA": "Georgia",
    "HI": "Hawaii", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana",
    "IA": "Iowa", "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana",
    "ME": "Maine", "MD": "Maryland", "MA": "Massachusetts", "MI": "Michigan",
    "MN": "Minnesota", "MS": "Mississippi", "MO": "Missouri", "MT": "Montana",
    "NE": "Nebraska", "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey",
    "NM": "New Mexico", "NY": "New York", "NC": "North Carolina",
    "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma", "OR": "Oregon",
    "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina",
    "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah",
    "VT": "Vermont", "VA": "Virginia", "WA": "Washington",
    "WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
}


async def _try_nominatim(address: str) -> Optional[dict]:
    params = {
        "q": address,
        "format": "json",
        "limit": 1,
        "addressdetails": 1,
    }
    headers = {"User-Agent": USER_AGENT}

    async with httpx.AsyncClient(timeout=15, headers=headers) as client:
        resp = await client.get(NOMINATIM_URL, params=params)
    if resp.status_code != 200:
        return None
    results = resp.json()
    if not results:
        return None

    r = results[0]
    addr = r.get("address", {})
    return {
        "lat": float(r["lat"]),
        "lon": float(r["lon"]),
        "display_name": r.get("display_name", address),
        "city": addr.get("city") or addr.get("town") or addr.get("village") or "",
        "state": addr.get("state", ""),
        "county": addr.get("county", ""),
        "property_type": _classify_property_type(r),
        "source": "nominatim",
    }


async def _try_census(address: str) -> Optional[dict]:
    params = {
        "address": address,
        "benchmark": "Public_AR_Current",
        "vintage": "Current_Current",
        "format": "json",
    }

    async with httpx.AsyncClient(timeout=20) as client:
        resp = await client.get(CENSUS_URL, params=params)
    if resp.status_code != 200:
        return None

    matches = (
        resp.json().get("result", {}).get("addressMatches") or []
    )
    if not matches:
        return None

    m = matches[0]
    coords = m.get("coordinates") or {}
    components = m.get("addressComponents") or {}
    geos = m.get("geographies") or {}
    counties = geos.get("Counties") or []
    county_name = counties[0].get("NAME") if counties else ""

    state_abbrev = components.get("state") or ""
    state_full = _US_STATES.get(state_abbrev, state_abbrev)

    return {
        "lat": float(coords.get("y")),
        "lon": float(coords.get("x")),
        "display_name": m.get("matchedAddress", address),
        "city": (components.get("city") or "").title(),
        "state": state_full,
        "county": county_name,
        # Census Geocoder resolves TIGER/Line street addresses and doesn't
        # expose building-type metadata. Default to residential β€” the
        # commercial-buildings-with-names case is handled by Nominatim,
        # which we try first.
        "property_type": "residential",
        "source": "census",
    }


async def geocode_address(address: str) -> Optional[dict]:
    nom = await _try_nominatim(address)
    if nom:
        return nom
    return await _try_census(address)