""" Read-only audit for listings missing usable geolocation. Prints listings where latitude/longitude are missing, null, blank, non-numeric, or outside valid coordinate ranges. """ import asyncio from typing import Any, Optional from app.database import connect_db, disconnect_db, get_db def _to_float(value: Any) -> Optional[float]: if value is None: return None if isinstance(value, str) and not value.strip(): return None try: return float(value) except (TypeError, ValueError): return None def _geo_problem(doc: dict) -> Optional[str]: has_lat = "latitude" in doc has_lon = "longitude" in doc lat = _to_float(doc.get("latitude")) lon = _to_float(doc.get("longitude")) if not has_lat and not has_lon: return "missing latitude and longitude fields" if not has_lat: return "missing latitude field" if not has_lon: return "missing longitude field" if lat is None and lon is None: return "empty/invalid latitude and longitude" if lat is None: return "empty/invalid latitude" if lon is None: return "empty/invalid longitude" if not -90 <= lat <= 90: return "latitude out of range" if not -180 <= lon <= 180: return "longitude out of range" return None async def main(): await connect_db() try: db = await get_db() total = await db.listings.count_documents({}) active_total = await db.listings.count_documents({"status": "active"}) projection = { "title": 1, "location": 1, "address": 1, "status": 1, "listing_type": 1, "latitude": 1, "longitude": 1, "created_at": 1, "createdAt": 1, } cursor = db.listings.find({}, projection).sort("created_at", -1) missing = [] async for doc in cursor: problem = _geo_problem(doc) if problem: missing.append((problem, doc)) print("=== Listings Missing Usable Geolocation ===") print(f"Total listings: {total}") print(f"Active listings: {active_total}") print(f"Need geolocation: {len(missing)}") print() by_status = {} by_problem = {} for problem, doc in missing: by_status[doc.get("status") or "unknown"] = by_status.get(doc.get("status") or "unknown", 0) + 1 by_problem[problem] = by_problem.get(problem, 0) + 1 print("By status:") for status, count in sorted(by_status.items()): print(f" {status}: {count}") print() print("By problem:") for problem, count in sorted(by_problem.items()): print(f" {problem}: {count}") print() for idx, (problem, doc) in enumerate(missing, start=1): title = (doc.get("title") or "No title").replace("\n", " ")[:80] print(f"{idx}. {title}") print(f" id: {doc.get('_id')}") print(f" status: {doc.get('status') or 'unknown'}") print(f" type: {doc.get('listing_type') or 'unknown'}") print(f" location: {doc.get('location')}") print(f" address: {doc.get('address')}") print(f" latitude: {doc.get('latitude')!r}") print(f" longitude: {doc.get('longitude')!r}") print(f" problem: {problem}") print() finally: await disconnect_db() if __name__ == "__main__": asyncio.run(main())