Franko Fišter commited on
Commit
a31ad35
·
1 Parent(s): 028bcd8

Added geocoding repository and endpoints

Browse files
api/geocoding_routes.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
2
+ from typing import Optional
3
+ import logging
4
+ from db.geocoding_repository import GeocodingRepository
5
+ from config.settings import GEOAPIFY_API_KEY
6
+
7
+ router = APIRouter(prefix="/geocoding", tags=["Geocoding"])
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Global variable to track geocoding status
12
+ geocoding_status = {
13
+ "running": False,
14
+ "last_run": None,
15
+ "last_result": None
16
+ }
17
+
18
+ async def run_geocoding_task(api_key: str = None):
19
+ """Background task to run geocoding"""
20
+ global geocoding_status
21
+
22
+ try:
23
+ geocoding_status["running"] = True
24
+ geocoding_status["last_result"] = None
25
+
26
+ async with GeocodingRepository() as repo:
27
+ result = await repo.batch_geocode_stores(api_key=api_key)
28
+
29
+ geocoding_status["last_result"] = result
30
+ geocoding_status["last_run"] = "completed"
31
+
32
+ except Exception as e:
33
+ logger.error(f"Error in geocoding task: {e}")
34
+ geocoding_status["last_result"] = {"error": str(e)}
35
+ geocoding_status["last_run"] = "failed"
36
+ finally:
37
+ geocoding_status["running"] = False
38
+
39
+ @router.post("/start")
40
+ async def start_geocoding(
41
+ background_tasks: BackgroundTasks,
42
+ api_key: Optional[str] = Query(None, description="Geoapify API key (optional - will use settings if not provided)")
43
+ ):
44
+ """
45
+ Start the geocoding process for all stores without coordinates using Geoapify.
46
+
47
+ - **api_key**: Geoapify API key (optional - will use environment variable if not provided)
48
+ """
49
+ if geocoding_status["running"]:
50
+ raise HTTPException(
51
+ status_code=400,
52
+ detail="Geocoding process is already running"
53
+ )
54
+
55
+ # Use provided API key or fallback to settings
56
+ final_api_key = api_key or GEOAPIFY_API_KEY
57
+
58
+ if not final_api_key:
59
+ raise HTTPException(
60
+ status_code=400,
61
+ detail="Geoapify API key is required. Provide it as parameter or set GEOAPIFY_API_KEY environment variable."
62
+ )
63
+
64
+ # Start background task
65
+ background_tasks.add_task(run_geocoding_task, final_api_key)
66
+
67
+ return {
68
+ "message": "Geocoding process started",
69
+ "status": "running",
70
+ "service": "Geoapify"
71
+ }
72
+
73
+ @router.get("/status")
74
+ async def get_geocoding_status():
75
+ """Get the current status of the geocoding process"""
76
+ return {
77
+ "running": geocoding_status["running"],
78
+ "last_run": geocoding_status["last_run"],
79
+ "last_result": geocoding_status["last_result"]
80
+ }
81
+
82
+ @router.get("/stores-without-coordinates")
83
+ async def get_stores_without_coordinates():
84
+ """Get count of stores that still need geocoding"""
85
+ try:
86
+ async with GeocodingRepository() as repo:
87
+ stores = repo.get_stores_without_coordinates()
88
+ return {
89
+ "count": len(stores),
90
+ "stores": stores[:10] # Return first 10 as sample
91
+ }
92
+ except Exception as e:
93
+ logger.error(f"Error getting stores without coordinates: {e}")
94
+ raise HTTPException(status_code=500, detail=str(e))
95
+
96
+ @router.post("/geocode-single/{store_id}")
97
+ async def geocode_single_store(
98
+ store_id: str,
99
+ api_key: Optional[str] = Query(None, description="Geoapify API key")
100
+ ):
101
+ """Geocode a single store by ID"""
102
+ try:
103
+ # Use provided API key or fallback to settings
104
+ final_api_key = api_key or GEOAPIFY_API_KEY
105
+
106
+ if not final_api_key:
107
+ raise HTTPException(
108
+ status_code=400,
109
+ detail="Geoapify API key is required"
110
+ )
111
+
112
+ async with GeocodingRepository() as repo:
113
+ # Get store details
114
+ store_response = repo.supabase.table('stores').select('*').eq('store_id', store_id).execute()
115
+
116
+ if not store_response.data:
117
+ raise HTTPException(status_code=404, detail="Store not found")
118
+
119
+ store = store_response.data[0]
120
+ address = store.get('store_address', '')
121
+
122
+ if not address:
123
+ raise HTTPException(status_code=400, detail="Store has no address")
124
+
125
+ # Geocode address using Geoapify
126
+ coordinates = await repo.geocode_address_geoapify(address, store_id, final_api_key)
127
+
128
+ if coordinates:
129
+ lat, lng = coordinates
130
+ if repo.update_store_coordinates(store_id, lat, lng):
131
+ return {
132
+ "message": "Store geocoded successfully",
133
+ "store_id": store_id,
134
+ "coordinates": {"lat": lat, "lng": lng}
135
+ }
136
+ else:
137
+ raise HTTPException(status_code=500, detail="Failed to update coordinates")
138
+ else:
139
+ raise HTTPException(status_code=400, detail="Failed to geocode address")
140
+
141
+ except HTTPException:
142
+ raise
143
+ except Exception as e:
144
+ logger.error(f"Error geocoding single store: {e}")
145
+ raise HTTPException(status_code=500, detail=str(e))
api/main.py CHANGED
@@ -5,6 +5,7 @@ from api.product_routes import router as product_router
5
  from api.receipt_routes import router as receipt_router
6
  from api.scrape_routes import router as scrape_router
7
  from api.cijene_routes import router as cijene_router
 
8
 
9
  # Initialize FastAPI
10
  app = FastAPI(title="SupaKuna API")
@@ -24,6 +25,7 @@ app.include_router(product_router)
24
  app.include_router(receipt_router)
25
  app.include_router(scrape_router)
26
  app.include_router(cijene_router)
 
27
 
28
  @app.get("/", tags=["Health"])
29
  def health_check():
 
5
  from api.receipt_routes import router as receipt_router
6
  from api.scrape_routes import router as scrape_router
7
  from api.cijene_routes import router as cijene_router
8
+ from api.geocoding_routes import router as geocoding_router
9
 
10
  # Initialize FastAPI
11
  app = FastAPI(title="SupaKuna API")
 
25
  app.include_router(receipt_router)
26
  app.include_router(scrape_router)
27
  app.include_router(cijene_router)
28
+ app.include_router(geocoding_router)
29
 
30
  @app.get("/", tags=["Health"])
31
  def health_check():
config/settings.py CHANGED
@@ -42,4 +42,7 @@ API_PORT = 7860
42
  MAX_RECEIPTS_PER_HOUR = 5
43
 
44
  # Google OCR config
45
- GOOGLE_VISION_KEY_PATH = "receipt-vision-key.json"
 
 
 
 
42
  MAX_RECEIPTS_PER_HOUR = 5
43
 
44
  # Google OCR config
45
+ GOOGLE_VISION_KEY_PATH = "receipt-vision-key.json"
46
+
47
+ # Geoapify config
48
+ GEOAPIFY_API_KEY = os.getenv("GEOAPIFY_API_KEY")
db/geocoding_repository.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ from typing import List, Dict, Optional, Tuple
4
+ import httpx
5
+ from db.supabase_client import SupabaseClient
6
+ from config.settings import GEOAPIFY_API_KEY
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class GeocodingRepository:
12
+ def __init__(self):
13
+ self.supabase = SupabaseClient().get_client()
14
+ self.session = None
15
+
16
+ async def __aenter__(self):
17
+ self.session = httpx.AsyncClient(
18
+ timeout=30.0,
19
+ headers={'User-Agent': 'SupaKuna-Geocoding/1.0'}
20
+ )
21
+ return self
22
+
23
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
24
+ if self.session:
25
+ await self.session.aclose()
26
+
27
+ def get_stores_without_coordinates(self) -> List[Dict]:
28
+ """Fetch all stores that don't have coordinates yet"""
29
+ try:
30
+ response = self.supabase.table('stores').select('*').is_('coordinates', 'null').execute()
31
+ logger.info(f"Found {len(response.data)} stores without coordinates")
32
+ return response.data
33
+ except Exception as e:
34
+ logger.error(f"Error fetching stores without coordinates: {e}")
35
+ return []
36
+
37
+ async def geocode_address_geoapify(self, address: str, store_id: str, api_key: str) -> Optional[Tuple[float, float]]:
38
+ """Geocode using Geoapify API"""
39
+ try:
40
+ cleaned_address = self._clean_address(address)
41
+
42
+ if 'croatia' not in cleaned_address.lower() and 'hrvatska' not in cleaned_address.lower():
43
+ cleaned_address += ', Croatia'
44
+
45
+ url = "https://api.geoapify.com/v1/geocode/search"
46
+ params = {
47
+ 'text': cleaned_address,
48
+ 'apiKey': api_key,
49
+ 'limit': 1,
50
+ 'filter': 'countrycode:hr'
51
+ }
52
+
53
+ response = await self.session.get(url, params=params)
54
+ response.raise_for_status()
55
+
56
+ data = response.json()
57
+
58
+ if data.get('features') and len(data['features']) > 0:
59
+ feature = data['features'][0]
60
+ coordinates = feature['geometry']['coordinates']
61
+ lng, lat = coordinates[0], coordinates[1]
62
+
63
+ if self._validate_croatian_coordinates(lat, lng):
64
+ logger.info(f"Geocoded store {store_id}: {cleaned_address} -> ({lat}, {lng})")
65
+ return (lat, lng)
66
+ else:
67
+ logger.warning(f"Coordinates outside Croatia for store {store_id}: ({lat}, {lng})")
68
+ return None
69
+ else:
70
+ logger.warning(f"No geocoding results for store {store_id}: {cleaned_address}")
71
+ return None
72
+
73
+ except Exception as e:
74
+ logger.error(f"Error geocoding address for store {store_id}: {e}")
75
+ return None
76
+
77
+ async def geocode_address(self, address: str, store_id: str) -> Optional[Tuple[float, float]]:
78
+ """Geocode a single address using Nominatim (OpenStreetMap) - fallback method"""
79
+ try:
80
+ # Clean and format address for better geocoding
81
+ cleaned_address = self._clean_address(address)
82
+
83
+ # Add Croatia to address if not present
84
+ if 'croatia' not in cleaned_address.lower() and 'hrvatska' not in cleaned_address.lower():
85
+ cleaned_address += ', Croatia'
86
+
87
+ # Use Nominatim (free, no API key required)
88
+ url = "https://nominatim.openstreetmap.org/search"
89
+ params = {
90
+ 'q': cleaned_address,
91
+ 'format': 'json',
92
+ 'limit': 1,
93
+ 'countrycodes': 'hr', # Restrict to Croatia
94
+ 'addressdetails': 1
95
+ }
96
+
97
+ response = await self.session.get(url, params=params)
98
+ response.raise_for_status()
99
+
100
+ data = response.json()
101
+
102
+ if data and len(data) > 0:
103
+ result = data[0]
104
+ lat = float(result['lat'])
105
+ lng = float(result['lon'])
106
+
107
+ # Validate coordinates are in Croatia
108
+ if self._validate_croatian_coordinates(lat, lng):
109
+ logger.info(f"Geocoded store {store_id}: {cleaned_address} -> ({lat}, {lng})")
110
+ return (lat, lng)
111
+ else:
112
+ logger.warning(f"Coordinates outside Croatia for store {store_id}: ({lat}, {lng})")
113
+ return None
114
+ else:
115
+ logger.warning(f"No geocoding results for store {store_id}: {cleaned_address}")
116
+ return None
117
+
118
+ except Exception as e:
119
+ logger.error(f"Error geocoding address for store {store_id}: {e}")
120
+ return None
121
+
122
+ def _clean_address(self, address: str) -> str:
123
+ """Clean and normalize address for better geocoding"""
124
+ if not address:
125
+ return ""
126
+
127
+ # Remove extra whitespace and normalize
128
+ cleaned = ' '.join(address.strip().split())
129
+
130
+ return cleaned
131
+
132
+ def _validate_croatian_coordinates(self, lat: float, lng: float) -> bool:
133
+ """Validate that coordinates are within Croatia's boundaries"""
134
+ # Croatia's approximate bounding box
135
+ min_lat, max_lat = 42.4, 46.6
136
+ min_lng, max_lng = 13.5, 19.5
137
+
138
+ return min_lat <= lat <= max_lat and min_lng <= lng <= max_lng
139
+
140
+ def update_store_coordinates(self, store_id: str, lat: float, lng: float) -> bool:
141
+ """Update store coordinates in the database"""
142
+ try:
143
+ # Update using PostGIS POINT format (longitude first, then latitude)
144
+ response = self.supabase.table('stores').update({
145
+ 'coordinates': f'POINT({lng} {lat})'
146
+ }).eq('store_id', store_id).execute()
147
+
148
+ if response.data:
149
+ logger.info(f"Updated coordinates for store {store_id}")
150
+ return True
151
+ else:
152
+ logger.error(f"Failed to update coordinates for store {store_id}")
153
+ return False
154
+
155
+ except Exception as e:
156
+ logger.error(f"Error updating coordinates for store {store_id}: {e}")
157
+ return False
158
+
159
+ async def batch_geocode_stores(self, api_key: str = None) -> Dict[str, int]:
160
+ """Batch geocode all stores without coordinates using Geoapify"""
161
+ stores = self.get_stores_without_coordinates()
162
+
163
+ if not stores:
164
+ logger.info("No stores found without coordinates")
165
+ return {"total": 0, "success": 0, "failed": 0}
166
+
167
+ # Use provided API key or fallback to settings
168
+ if not api_key:
169
+ api_key = GEOAPIFY_API_KEY
170
+
171
+ if not api_key:
172
+ logger.error("No Geoapify API key provided")
173
+ return {"total": 0, "success": 0, "failed": 0, "error": "No API key"}
174
+
175
+ total_stores = len(stores)
176
+ successful_updates = 0
177
+ failed_updates = 0
178
+
179
+ logger.info(f"Starting batch geocoding for {total_stores} stores using Geoapify")
180
+
181
+ for i, store in enumerate(stores, 1):
182
+ try:
183
+ store_id = store.get('store_id') # Changed from 'id' to 'store_id'
184
+ address = store.get('store_address', '') # Changed from 'address' to 'store_address'
185
+
186
+ if not store_id:
187
+ logger.warning(f"Store has no store_id: {store}")
188
+ failed_updates += 1
189
+ continue
190
+
191
+ if not address:
192
+ logger.warning(f"Store {store_id} has no address")
193
+ failed_updates += 1
194
+ continue
195
+
196
+ # Geocode using Geoapify
197
+ coordinates = await self.geocode_address_geoapify(address, store_id, api_key)
198
+
199
+ if coordinates:
200
+ lat, lng = coordinates
201
+ if self.update_store_coordinates(store_id, lat, lng):
202
+ successful_updates += 1
203
+ else:
204
+ failed_updates += 1
205
+ else:
206
+ failed_updates += 1
207
+
208
+ # Rate limiting for Geoapify (can handle more requests)
209
+ await asyncio.sleep(0.1) # 10 requests per second
210
+
211
+ # Progress logging
212
+ if i % 10 == 0:
213
+ logger.info(f"Processed {i}/{total_stores} stores")
214
+
215
+ except Exception as e:
216
+ logger.error(f"Error processing store {store.get('store_id', 'unknown')}: {e}")
217
+ failed_updates += 1
218
+
219
+ result = {
220
+ "total": total_stores,
221
+ "success": successful_updates,
222
+ "failed": failed_updates
223
+ }
224
+
225
+ logger.info(f"Batch geocoding completed: {result}")
226
+ return result