File size: 11,385 Bytes
dbaeeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import requests
import json
import time
from typing import Dict, Optional, Tuple
from smolagents import Tool
from functools import lru_cache

class GeocodingTool(Tool):
    """
    Tool to convert addresses to latitude/longitude coordinates using free geocoding services.
    Enables other tools like subway proximity to work with address data.
    """
    
    name = "geocode_address"
    description = (
        "Converts a street address to latitude and longitude coordinates. "
        "Takes an address string and returns coordinates that can be used "
        "with other location-based tools like subway proximity finder."
    )
    
    inputs = {
        "address": {
            "type": "string",
            "description": "Street address to convert to coordinates (e.g., 'Nelson Ave near East 181st, Bronx, NY')"
        }
    }
    output_type = "string"
    
    def __init__(self):
        """Initialize the geocoding tool with rate limiting."""
        super().__init__()
        self._last_request_time = 0
        self._rate_limit_delay = 1.0  # 1 second between requests to be respectful
        self.is_initialized = True  # Add this attribute that smolagents might expect
        print("🌍 GeocodingTool initialized with rate limiting")
    
    @lru_cache(maxsize=500)
    def _cached_geocode(self, address: str) -> Optional[Tuple[float, float]]:
        """
        Cached geocoding function to avoid repeated API calls for same address.
        Uses LRU cache to store up to 500 recent results.
        """
        return self._geocode_with_nominatim(address)
    
    def _rate_limit(self):
        """Implement rate limiting to be respectful to free services."""
        current_time = time.time()
        time_since_last = current_time - self._last_request_time
        
        if time_since_last < self._rate_limit_delay:
            sleep_time = self._rate_limit_delay - time_since_last
            time.sleep(sleep_time)
        
        self._last_request_time = time.time()
    
    def _geocode_with_nominatim(self, address: str) -> Optional[Tuple[float, float]]:
        """
        Geocode address using OpenStreetMap Nominatim service (free).
        Returns (latitude, longitude) tuple or None if geocoding fails.
        """
        try:
            # Apply rate limiting
            self._rate_limit()
            
            # Nominatim API endpoint
            url = "https://nominatim.openstreetmap.org/search"
            
            # Parameters for better NYC results
            params = {
                "q": address,
                "format": "json",
                "addressdetails": 1,
                "limit": 1,
                "countrycodes": "us",
                "bounded": 1,
                "viewbox": "-74.3,40.4,-73.7,40.9",  # NYC bounding box
            }
            
            headers = {
                "User-Agent": "VoucherBot-Geocoder/1.0 (Housing Search Application)"
            }
            
            response = requests.get(url, params=params, headers=headers, timeout=10)
            response.raise_for_status()
            
            results = response.json()
            
            if results and len(results) > 0:
                result = results[0]
                lat = float(result["lat"])
                lon = float(result["lon"])
                
                # Validate coordinates are in NYC area
                if 40.4 <= lat <= 40.9 and -74.3 <= lon <= -73.7:
                    return (lat, lon)
                else:
                    print(f"⚠️ Coordinates outside NYC: {lat}, {lon}")
                    return None
            else:
                print(f"❌ No geocoding results for: {address}")
                return None
                
        except Exception as e:
            print(f"❌ Geocoding error for '{address}': {str(e)}")
            return None
    
    def _format_output(self, address: str, coordinates: Optional[Tuple[float, float]]) -> Dict:
        """Format the geocoding output with comprehensive information."""
        if coordinates:
            lat, lon = coordinates
            return {
                "status": "success",
                "data": {
                    "address": address,
                    "latitude": lat,
                    "longitude": lon,
                    "coordinates": f"{lat},{lon}"
                },
                "metadata": {
                    "service": "OpenStreetMap Nominatim",
                    "timestamp": time.time(),
                    "cached": self._cached_geocode.cache_info().currsize > 0 if hasattr(self._cached_geocode, 'cache_info') else False
                }
            }
        else:
            return {
                "status": "error",
                "message": f"Could not geocode address: {address}",
                "data": None,
                "metadata": {
                    "service": "OpenStreetMap Nominatim",
                    "timestamp": time.time()
                }
            }
    
    def _smart_address_variants(self, address: str) -> list:
        """
        Generate smart address variants for fuzzy addresses like 'E 181st St near clinton ave'.
        Returns a list of address variants to try, ordered by likely success.
        """
        import re
        
        variants = [address]  # Always try original first
        
        # Extract street info
        street_patterns = [
            r'(E\s+\d+(?:st|nd|rd|th)\s+St)',  # E 181st St
            r'(W\s+\d+(?:st|nd|rd|th)\s+St)',  # W 192nd St
            r'(\d+(?:st|nd|rd|th)\s+St)',      # 181st St
            r'([A-Za-z]+\s+Ave)',              # Grand Ave, Clinton Ave
            r'([A-Za-z]+\s+Avenue)',           # Grand Avenue
        ]
        
        # Extract borough
        borough_match = re.search(r'(Bronx|Brooklyn|Manhattan|Queens|Staten Island),?\s*NY', address, re.IGNORECASE)
        borough = borough_match.group(1) if borough_match else ""
        
        # Find streets in the address
        found_streets = []
        for pattern in street_patterns:
            matches = re.findall(pattern, address, re.IGNORECASE)
            found_streets.extend(matches)
        
        # Create variants with different combinations
        if found_streets and borough:
            for street in found_streets:
                # Try just the street with borough
                variants.append(f"{street}, {borough}, NY")
                
                # Try with zip codes for common areas
                if "181" in street and "Bronx" in borough:
                    variants.extend([
                        f"{street}, {borough}, NY 10453",  # Common Bronx zip
                        f"{street}, {borough}, NY 10457",
                        f"{street}, {borough}, NY 10468"
                    ])
                elif "192" in street and "Bronx" in borough:
                    variants.extend([
                        f"{street}, {borough}, NY 10468",  # Kingsbridge area
                        f"{street}, {borough}, NY 10463"
                    ])
        
        # If it's a "near" address, try the main street
        if " near " in address.lower():
            main_part = address.split(" near ")[0].strip()
            if borough:
                variants.append(f"{main_part}, {borough}, NY")
        
        # Remove duplicates while preserving order
        seen = set()
        unique_variants = []
        for variant in variants:
            if variant.lower() not in seen:
                seen.add(variant.lower())
                unique_variants.append(variant)
        
        return unique_variants

    def forward(self, address: str) -> str:
        """
        Convert an address to latitude/longitude coordinates with smart fallback.
        
        Args:
            address: Street address to geocode
            
        Returns:
            JSON string with coordinates or error information
        """
        if not address or not isinstance(address, str):
            error_result = {
                "status": "error",
                "message": "Invalid address: must be a non-empty string",
                "data": None
            }
            return json.dumps(error_result, indent=2)
        
        # Clean up the address
        original_address = address.strip()
        
        print(f"🌍 Geocoding address: {original_address}")
        
        try:
            # Generate smart address variants
            address_variants = self._smart_address_variants(original_address)
            
            coordinates = None
            successful_variant = None
            
            # Try each variant until one works
            for i, variant in enumerate(address_variants):
                if i > 0:  # Don't print for the first (original) attempt
                    print(f"πŸ”„ Trying variant: {variant}")
                
                coordinates = self._cached_geocode(variant)
                if coordinates:
                    successful_variant = variant
                    break
            
            # Format and return result
            if coordinates:
                lat, lon = coordinates
                result = {
                    "status": "success",
                    "data": {
                        "address": original_address,
                        "successful_variant": successful_variant,
                        "latitude": lat,
                        "longitude": lon,
                        "coordinates": f"{lat},{lon}"
                    },
                    "metadata": {
                        "service": "OpenStreetMap Nominatim",
                        "timestamp": time.time(),
                        "variants_tried": len(address_variants),
                        "cached": self._cached_geocode.cache_info().currsize > 0 if hasattr(self._cached_geocode, 'cache_info') else False
                    }
                }
                print(f"βœ… Geocoded: {original_address} β†’ ({lat}, {lon}) via '{successful_variant}'")
            else:
                result = {
                    "status": "error", 
                    "message": f"Could not geocode address after trying {len(address_variants)} variants",
                    "data": {
                        "original_address": original_address,
                        "variants_tried": address_variants
                    },
                    "metadata": {
                        "service": "OpenStreetMap Nominatim",
                        "timestamp": time.time(),
                        "variants_tried": len(address_variants)
                    }
                }
                print(f"❌ Failed to geocode: {original_address} (tried {len(address_variants)} variants)")
            
            return json.dumps(result, indent=2)
            
        except Exception as e:
            error_result = {
                "status": "error",
                "message": f"Geocoding error: {str(e)}",
                "data": None,
                "metadata": {
                    "timestamp": time.time(),
                    "address": original_address
                }
            }
            print(f"❌ Geocoding exception: {str(e)}")
            return json.dumps(error_result, indent=2)

# Create the tool instance
geocoding_tool = GeocodingTool()