AarnavNoble commited on
Commit
241aa0d
·
verified ·
1 Parent(s): 75f2953

Upload backend/services/overpass.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/services/overpass.py +156 -0
backend/services/overpass.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Overpass API client for fetching POIs from OpenStreetMap.
3
+ Completely free, no API key needed.
4
+ """
5
+
6
+ import httpx
7
+ from dataclasses import dataclass, field
8
+
9
+ OVERPASS_URL = "https://overpass-api.de/api/interpreter"
10
+
11
+ # OSM tags that map to travel interest categories
12
+ # Global chain brands to filter out — we want local places
13
+ CHAIN_BLOCKLIST = {
14
+ "starbucks", "mcdonald's", "mcdonalds", "burger king", "kfc", "subway",
15
+ "pizza hut", "domino's", "dominos", "dunkin", "dunkin'", "tim hortons",
16
+ "costa coffee", "costa", "nero", "caffe nero", "pret", "pret a manger",
17
+ "seven eleven", "7-eleven", "7eleven", "lawson", "familymart", "family mart",
18
+ "circle k", "spar", "aldi", "lidl", "walmart", "tesco", "sainsbury's",
19
+ "wendy's", "wendys", "taco bell", "popeyes", "chick-fil-a", "five guys",
20
+ "shake shack", "mcdonald", "ikea", "zara", "h&m", "uniqlo",
21
+ }
22
+
23
+
24
+ def _is_chain(name: str, tags: dict = None) -> bool:
25
+ if name.lower().strip() in CHAIN_BLOCKLIST:
26
+ return True
27
+ # OSM brand tag always has the English brand name even for non-English locations
28
+ brand = (tags or {}).get("brand", "").lower().strip()
29
+ return brand in CHAIN_BLOCKLIST
30
+
31
+
32
+ CATEGORY_QUERIES = {
33
+ "food": ['amenity~"restaurant|cafe|bar|food_court|fast_food"'],
34
+ "nature": ['leisure~"park|nature_reserve|garden"', 'natural~"beach|waterfall|viewpoint"'],
35
+ "history": ['historic~"monument|castle|ruins|memorial|archaeological_site"'],
36
+ "culture": ['tourism~"museum|gallery|artwork"', 'amenity~"theatre|cinema"'],
37
+ "nightlife": ['amenity~"bar|nightclub|pub"'],
38
+ "shopping": ['shop~"mall|market|department_store|boutique"'],
39
+ "adventure": ['leisure~"climbing|sports_centre"', 'sport~"hiking|cycling"'],
40
+ "hidden_gems": ['tourism~"attraction|viewpoint"', 'historic'],
41
+ }
42
+
43
+
44
+ @dataclass
45
+ class POI:
46
+ id: int
47
+ name: str
48
+ lat: float
49
+ lon: float
50
+ category: str
51
+ tags: dict = field(default_factory=dict)
52
+
53
+ @property
54
+ def description(self) -> str:
55
+ """Build a text description from OSM tags for embedding."""
56
+ parts = [self.name, self.category]
57
+ for key in ("description", "cuisine", "sport", "historic", "tourism", "amenity", "leisure"):
58
+ val = self.tags.get(key)
59
+ if val:
60
+ parts.append(val.replace("_", " "))
61
+ return " ".join(parts)
62
+
63
+ def to_dict(self) -> dict:
64
+ return {
65
+ "id": self.id,
66
+ "name": self.name,
67
+ "lat": self.lat,
68
+ "lon": self.lon,
69
+ "category": self.category,
70
+ "description": self.description,
71
+ "tags": self.tags,
72
+ }
73
+
74
+
75
+ def _build_query(lat: float, lon: float, radius_m: int, categories: list[str]) -> str:
76
+ """Build Overpass QL query for given categories around a point."""
77
+ tag_filters = []
78
+ for cat in categories:
79
+ for tag_expr in CATEGORY_QUERIES.get(cat, []):
80
+ # [!"brand"] excludes chain restaurants/shops at the query level
81
+ tag_filters.append(f'node[{tag_expr}][!"brand"](around:{radius_m},{lat},{lon});')
82
+ tag_filters.append(f'way[{tag_expr}][!"brand"](around:{radius_m},{lat},{lon});')
83
+
84
+ union = "\n".join(tag_filters)
85
+ return f"""
86
+ [out:json][timeout:25];
87
+ (
88
+ {union}
89
+ );
90
+ out center 100;
91
+ """
92
+
93
+
94
+ def fetch_pois(lat: float, lon: float, categories: list[str], radius_m: int = 5000) -> list[POI]:
95
+ """
96
+ Fetch POIs from Overpass API around a coordinate.
97
+ Returns up to 100 POIs across the requested categories.
98
+ """
99
+ query = _build_query(lat, lon, radius_m, categories)
100
+ resp = httpx.post(OVERPASS_URL, data={"data": query}, timeout=30)
101
+ resp.raise_for_status()
102
+
103
+ elements = resp.json().get("elements", [])
104
+ pois = []
105
+
106
+ for el in elements:
107
+ tags = el.get("tags", {})
108
+ name = tags.get("name")
109
+ if not name:
110
+ continue
111
+ if _is_chain(name, tags):
112
+ continue
113
+
114
+ # get coordinates (nodes have lat/lon directly, ways have center)
115
+ if el["type"] == "node":
116
+ lat_el, lon_el = el.get("lat"), el.get("lon")
117
+ else:
118
+ center = el.get("center", {})
119
+ lat_el, lon_el = center.get("lat"), center.get("lon")
120
+
121
+ if lat_el is None or lon_el is None:
122
+ continue
123
+
124
+ # infer category from tags
125
+ category = _infer_category(tags)
126
+
127
+ pois.append(POI(
128
+ id=el["id"],
129
+ name=name,
130
+ lat=lat_el,
131
+ lon=lon_el,
132
+ category=category,
133
+ tags=tags,
134
+ ))
135
+
136
+ return pois
137
+
138
+
139
+ def _infer_category(tags: dict) -> str:
140
+ amenity = tags.get("amenity", "")
141
+ tourism = tags.get("tourism", "")
142
+ historic = tags.get("historic", "")
143
+ leisure = tags.get("leisure", "")
144
+ natural = tags.get("natural", "")
145
+
146
+ if amenity in ("restaurant", "cafe", "bar", "food_court", "fast_food"):
147
+ return "food"
148
+ if amenity in ("nightclub", "pub"):
149
+ return "nightlife"
150
+ if tourism in ("museum", "gallery"):
151
+ return "culture"
152
+ if historic:
153
+ return "history"
154
+ if leisure in ("park", "nature_reserve", "garden") or natural:
155
+ return "nature"
156
+ return "attraction"